library(stringr)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-32. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
library(ASSIGN)
library(data.table)
suppressMessages(require(Seurat))
suppressMessages(require(ggplot2))
suppressMessages(require(cowplot))
suppressMessages(require(scater))
suppressMessages(require(scran))
suppressMessages(require(BiocParallel))
suppressMessages(require(BiocNeighbors))
#generate 25 gene expression signature
setwd("/home/li/covid19/result01/total")
cell24_4<-read.table("24celllines_4patients_norm.txt")
write.csv(cell24_4,"cell24_4.csv")

cell24_4<-read.csv("cell24_4.csv",header=TRUE)
cell24_4<-(log2(data.frame(cell24_4,check.names = F, row.names=1)+1))
colnames(cell24_4)
##  [1] "S5_A549_Mock_1"        "S5_A549_Mock_2"        "S5_A549_Mock_3"       
##  [4] "S6_A549.ACE2_Mock_1"   "S6_A549.ACE2_Mock_2"   "S6_A549.ACE2_Mock_3"  
##  [7] "S7_Calu3_Mock_1"       "S7_Calu3_Mock_2"       "S7_Calu3_Mock_3"      
## [10] "S16_A549.ACE2_Mock_1"  "S16_A549.ACE2_Mock_2"  "S16_A549.ACE2_Mock_3" 
## [13] "S15_Healthy_2"         "S15_Healthy_1"         "S5_A549_CoV.2_1"      
## [16] "S5_A549_CoV.2_2"       "S5_A549_CoV.2_3"       "S6_A549.ACE2_CoV.2_1" 
## [19] "S6_A549.ACE2_CoV.2_2"  "S6_A549.ACE2_CoV.2_3"  "S7_Calu3_CoV.2_1"     
## [22] "S7_Calu3_CoV.2_2"      "S7_Calu3_CoV.2_3"      "S16_A549.ACE2_CoV.2_1"
## [25] "S16_A549.ACE2_CoV.2_2" "S16_A549.ACE2_CoV.2_3" "S15_COV2_2"           
## [28] "S15_COV2_1"
#rownames(cell24_4)
plot(hclust(dist(t(cell24_4)),method="complete"))

cell24_4_filt<-(cell24_4[apply(cell24_4==0,1,mean)<0.1,])
pca<-prcomp(t(cell24_4_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:14,1],pca$x[1:14,2],col=2,pch=2)
  points(pca$x[15:28,1],pca$x[15:28,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
##        S5_A549_Mock_1        S5_A549_Mock_2        S5_A549_Mock_3 
##                     1                     2                     3 
##   S6_A549.ACE2_Mock_1   S6_A549.ACE2_Mock_2   S6_A549.ACE2_Mock_3 
##                     4                     5                     6 
##       S7_Calu3_Mock_1       S7_Calu3_Mock_2       S7_Calu3_Mock_3 
##                     7                     8                     9 
##  S16_A549.ACE2_Mock_1  S16_A549.ACE2_Mock_2  S16_A549.ACE2_Mock_3 
##                    10                    11                    12 
##       S5_A549_CoV.2_1       S5_A549_CoV.2_2       S5_A549_CoV.2_3 
##                    15                    16                    17 
##  S6_A549.ACE2_CoV.2_1  S6_A549.ACE2_CoV.2_2  S6_A549.ACE2_CoV.2_3 
##                    18                    19                    20 
##      S7_Calu3_CoV.2_3 S16_A549.ACE2_CoV.2_1 S16_A549.ACE2_CoV.2_2 
##                    23                    24                    25 
## S16_A549.ACE2_CoV.2_3 
##                    26
which(pca$x[,1]< 0)
##        S5_A549_Mock_1        S5_A549_Mock_2        S5_A549_Mock_3 
##                     1                     2                     3 
##   S6_A549.ACE2_Mock_1   S6_A549.ACE2_Mock_2   S6_A549.ACE2_Mock_3 
##                     4                     5                     6 
##       S7_Calu3_Mock_1       S7_Calu3_Mock_2       S7_Calu3_Mock_3 
##                     7                     8                     9 
##  S16_A549.ACE2_Mock_1  S16_A549.ACE2_Mock_2  S16_A549.ACE2_Mock_3 
##                    10                    11                    12 
##       S5_A549_CoV.2_1       S5_A549_CoV.2_2       S5_A549_CoV.2_3 
##                    15                    16                    17 
##  S6_A549.ACE2_CoV.2_1  S6_A549.ACE2_CoV.2_2  S6_A549.ACE2_CoV.2_3 
##                    18                    19                    20 
##      S7_Calu3_CoV.2_1      S7_Calu3_CoV.2_2      S7_Calu3_CoV.2_3 
##                    21                    22                    23 
## S16_A549.ACE2_CoV.2_1 S16_A549.ACE2_CoV.2_2 S16_A549.ACE2_CoV.2_3 
##                    24                    25                    26
bat <-
  as.data.frame(cbind(
    colnames(cell24_4_filt),
    c(
      rep(1, 3),
      rep(2, 3),
      rep(3, 3),
      rep(4, 3),
      rep(5, 2),
      rep(1, 3),
      rep(2, 3),
      rep(3, 3),
      rep(4, 3),
      rep(5, 2)
    ),
    c(rep(1, 14), rep(2, 14))
  ))

mod<-model.matrix(~as.factor(bat[,3]), data=bat)

combat_cell24_4<-ComBat(dat = as.matrix(cell24_4_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
##PCA post combat
pca<-prcomp(t(combat_cell24_4))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:14,1],pca$x[1:14,2], main="Top 2 PCs",col=2)
  points(pca$x[15:28,1],pca$x[15:28,2], main="Top 2 PCs",col=3)
}

which(pca$x[,1]< -50)
##      S7_Calu3_Mock_2 S16_A549.ACE2_Mock_1 S16_A549.ACE2_Mock_3 
##                    8                   10                   12 
##           S15_COV2_1 
##                   28
which(pca$x[,2]< -50)
## S15_Healthy_2 
##            13
c_mock<-as.matrix(combat_cell24_4[,c(1:12)])
c_cov<-as.matrix(combat_cell24_4[,c(15:26)])
test<-as.matrix(combat_cell24_4[,c(13:14,27:28)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
  trainingData = cbind(c_mock, c_cov),
  testData = test,
  trainingLabel = trainingLabela,
  geneList = NULL,
  n_sigGene = 25,
  adaptive_B = T,
  adaptive_S = F,
  outputDir = sub_dir,
  p_beta = 0.01,
  theta0 = 0.05,
  theta1 = 0.9,
  iter = 2000,
  burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Gene selection on cov pathway...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Outputing results...
#use series 15 to test positive
cell5_6_7_16_15<-read.table("56716_15positive.txt",header=TRUE)
write.csv(cell5_6_7_16_15,"cell5_6_7_16_15.csv")
mock_cov<-read.csv("cell5_6_7_16_15.csv", header = TRUE)
mock_cov<-(log2(data.frame(mock_cov,check.names = F, row.names=1)+1))
mock_cov_filt<-(mock_cov[apply(mock_cov==0,1,mean)<0.1,])
bat <-as.data.frame(cbind(
  colnames(cell5_6_7_16_15),
  c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 2),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 2)),
  c(rep(1, 14), rep(2, 14))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(mock_cov_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(15:26)])
test<-as.matrix(combat_mock_cov[,c(13:14,27:28)])

trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")

basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_positive", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
  trainingData = cbind(c_mock, c_cov),
  testData = test,
  trainingLabel = trainingLabela,
  geneList = list(genelist_25$X),
  adaptive_B = T,
  adaptive_S = F,
  outputDir = sub_dir,
  p_beta = 0.01,
  theta0 = 0.05,
  theta1 = 0.9,
  iter = 2000,
  burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Outputing results...
#use series 2 to test negative
cell5_6_7_16_2<-read.table("56716_2negative.txt",header=TRUE)
write.csv(cell5_6_7_16_2,"cell5_6_7_16_2.csv")
mock_cov<-read.csv("cell5_6_7_16_2.csv", header = TRUE)
mock_cov<-(log2(data.frame(mock_cov,check.names = F, row.names=1)+1))
mock_cov_filt<-(mock_cov[apply(mock_cov==0,1,mean)<0.1,])
bat <-as.data.frame(cbind(
  colnames(cell5_6_7_16_2),
  c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3)),
  c(rep(1, 15), rep(2, 15))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(mock_cov_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found5batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:30)])

trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")

basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_negative", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
  trainingData = cbind(c_mock, c_cov),
  testData = test,
  trainingLabel = trainingLabela,
  geneList = list(genelist_25$X),
  adaptive_B = T,
  adaptive_S = F,
  outputDir = sub_dir,
  p_beta = 0.01,
  theta0 = 0.05,
  theta1 = 0.9,
  iter = 2000,
  burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Outputing results...
# BALF
cell_5_6_7_16_BALF<-read.table("cell_5_6_7_16_BALF_norm.txt",head=TRUE)
write.csv(cell_5_6_7_16_BALF,"cell_5_6_7_16_BALF.csv")
cells_4_BALF<-read.csv("cell_5_6_7_16_BALF.csv",header=TRUE)
cells_4_BALF<-(log2(data.frame(cells_4_BALF,check.names = F, row.names=1)+1))
colnames(cells_4_BALF)
##  [1] "Series5_A549_Mock_1"             "Series5_A549_Mock_2"            
##  [3] "Series5_A549_Mock_3"             "Series6_A549.ACE2_Mock_1"       
##  [5] "Series6_A549.ACE2_Mock_2"        "Series6_A549.ACE2_Mock_3"       
##  [7] "Series7_Calu3_Mock_1"            "Series7_Calu3_Mock_2"           
##  [9] "Series7_Calu3_Mock_3"            "Series16_A549.ACE2_Mock_1"      
## [11] "Series16_A549.ACE2_Mock_2"       "Series16_A549.ACE2_Mock_3"      
## [13] "SRR10571724"                     "SRR10571730"                    
## [15] "SRR10571732"                     "Series5_A549_SARS.CoV.2_1"      
## [17] "Series5_A549_SARS.CoV.2_2"       "Series5_A549_SARS.CoV.2_3"      
## [19] "Series6_A549.ACE2_SARS.CoV.2_1"  "Series6_A549.ACE2_SARS.CoV.2_2" 
## [21] "Series6_A549.ACE2_SARS.CoV.2_3"  "Series7_Calu3_SARS.CoV.2_1"     
## [23] "Series7_Calu3_SARS.CoV.2_2"      "Series7_Calu3_SARS.CoV.2_3"     
## [25] "Series16_A549.ACE2_SARS.CoV.2_1" "Series16_A549.ACE2_SARS.CoV.2_2"
## [27] "Series16_A549.ACE2_SARS.CoV.2_3" "CRR119894"                      
## [29] "CRR119895"                       "CRR119896"                      
## [31] "CRR119897"
#rownames(cells_4_BALF)
plot(hclust(dist(t(cells_4_BALF)),method="complete"))##samples are rather clustering by type than the COV2 infection status

#Series 15 is furthest from the others and that makes sense since these are from patients rather than cell lines. We should exclude Series 15 from the signature generation dataset.

##filter all zeroes
cells_4_BALF_filt<-(cells_4_BALF[apply(cells_4_BALF==0,1,mean)<0.1,])
#precombat PCA
pca<-prcomp(t(cells_4_BALF_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:15,1],pca$x[1:15,2],col=2,pch=2)
  points(pca$x[16:31,1],pca$x[16:31,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
##             Series5_A549_Mock_1             Series5_A549_Mock_2 
##                               1                               2 
##             Series5_A549_Mock_3        Series6_A549.ACE2_Mock_1 
##                               3                               4 
##        Series6_A549.ACE2_Mock_2        Series6_A549.ACE2_Mock_3 
##                               5                               6 
##            Series7_Calu3_Mock_1            Series7_Calu3_Mock_2 
##                               7                               8 
##            Series7_Calu3_Mock_3       Series16_A549.ACE2_Mock_1 
##                               9                              10 
##       Series16_A549.ACE2_Mock_2       Series16_A549.ACE2_Mock_3 
##                              11                              12 
##       Series5_A549_SARS.CoV.2_1       Series5_A549_SARS.CoV.2_2 
##                              16                              17 
##       Series5_A549_SARS.CoV.2_3  Series6_A549.ACE2_SARS.CoV.2_1 
##                              18                              19 
##  Series6_A549.ACE2_SARS.CoV.2_2  Series6_A549.ACE2_SARS.CoV.2_3 
##                              20                              21 
##      Series7_Calu3_SARS.CoV.2_1      Series7_Calu3_SARS.CoV.2_2 
##                              22                              23 
##      Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1 
##                              24                              25 
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3 
##                              26                              27
which(pca$x[,1]< 0)
##             Series5_A549_Mock_1             Series5_A549_Mock_2 
##                               1                               2 
##             Series5_A549_Mock_3        Series6_A549.ACE2_Mock_1 
##                               3                               4 
##        Series6_A549.ACE2_Mock_2        Series6_A549.ACE2_Mock_3 
##                               5                               6 
##            Series7_Calu3_Mock_1            Series7_Calu3_Mock_2 
##                               7                               8 
##            Series7_Calu3_Mock_3       Series16_A549.ACE2_Mock_1 
##                               9                              10 
##       Series16_A549.ACE2_Mock_2       Series16_A549.ACE2_Mock_3 
##                              11                              12 
##       Series5_A549_SARS.CoV.2_1       Series5_A549_SARS.CoV.2_2 
##                              16                              17 
##       Series5_A549_SARS.CoV.2_3  Series6_A549.ACE2_SARS.CoV.2_1 
##                              18                              19 
##  Series6_A549.ACE2_SARS.CoV.2_2  Series6_A549.ACE2_SARS.CoV.2_3 
##                              20                              21 
##      Series7_Calu3_SARS.CoV.2_1      Series7_Calu3_SARS.CoV.2_2 
##                              22                              23 
##      Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1 
##                              24                              25 
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3 
##                              26                              27
###we need to batch adjust based on the cell types
##Series5_A549-1 Series6_A549.ACE2-2 Series7_Calu3-3 Series16_A549.ACE2-4, patients-5
bat <-as.data.frame(cbind(
  colnames(cells_4_BALF_filt),
  c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(6, 2), rep(7, 2)),
  c(rep(1, 15), rep(2, 16))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(cells_4_BALF_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found7batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
write.csv(combat_mock_cov,"combat_cell56716_BALF.csv")
plot(hclust(dist(t(combat_mock_cov)),method="complete"))

##PCA post combat
pca<-prcomp(t(combat_mock_cov))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:15,1],pca$x[1:15,2], main="Top 2 PCs",col=2)
  points(pca$x[16:31,1],pca$x[16:31,2], main="Top 2 PCs",col=3)}

which(pca$x[,1]< -50)
## named integer(0)
which(pca$x[,2]< -50)
## named integer(0)
########running assign with the best 25 genes found in the cell line data########
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:31)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_BALF", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
  trainingData = cbind(c_mock, c_cov),
  testData = test,
  trainingLabel = trainingLabela,
  geneList = list(genelist_25$X),
  adaptive_B = T,
  adaptive_S = F,
  outputDir = sub_dir,
  p_beta = 0.01,
  theta0 = 0.05,
  theta1 = 0.9,
  iter = 2000,
  burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Outputing results...
#PBMC
cell_5_6_7_16_PBMC<-read.table("cell_5_6_7_16_PBMC_norm.txt",head=TRUE)
write.csv(cell_5_6_7_16_PBMC,"cell_5_6_7_16_PBMC.csv")
cells_4_PBMC<-read.csv("cell_5_6_7_16_PBMC.csv",header=TRUE)
cells_4_PBMC<-(log2(data.frame(cells_4_PBMC,check.names = F, row.names=1)+1))
colnames(cells_4_PBMC)
##  [1] "Series5_A549_Mock_1"             "Series5_A549_Mock_2"            
##  [3] "Series5_A549_Mock_3"             "Series6_A549.ACE2_Mock_1"       
##  [5] "Series6_A549.ACE2_Mock_2"        "Series6_A549.ACE2_Mock_3"       
##  [7] "Series7_Calu3_Mock_1"            "Series7_Calu3_Mock_2"           
##  [9] "Series7_Calu3_Mock_3"            "Series16_A549.ACE2_Mock_1"      
## [11] "Series16_A549.ACE2_Mock_2"       "Series16_A549.ACE2_Mock_3"      
## [13] "CRR119890"                       "CRR125445"                      
## [15] "CRR125446"                       "Series5_A549_SARS.CoV.2_1"      
## [17] "Series5_A549_SARS.CoV.2_2"       "Series5_A549_SARS.CoV.2_3"      
## [19] "Series6_A549.ACE2_SARS.CoV.2_1"  "Series6_A549.ACE2_SARS.CoV.2_2" 
## [21] "Series6_A549.ACE2_SARS.CoV.2_3"  "Series7_Calu3_SARS.CoV.2_1"     
## [23] "Series7_Calu3_SARS.CoV.2_2"      "Series7_Calu3_SARS.CoV.2_3"     
## [25] "Series16_A549.ACE2_SARS.CoV.2_1" "Series16_A549.ACE2_SARS.CoV.2_2"
## [27] "Series16_A549.ACE2_SARS.CoV.2_3" "CRR119891"                      
## [29] "CRR119892"                       "CRR119893"
#rownames(cells_4_PBMC)
plot(hclust(dist(t(cells_4_PBMC)),method="complete"))##samples are rather clustering by type than the COV2 infection status

#Series 15 is furthest from the others and that makes sense since these are from patients rather than cell lines. We should exclude Series 15 from the signature generation dataset.

##filter all zeroes
cells_4_PBMC_filt<-(cells_4_PBMC[apply(cells_4_PBMC==0,1,mean)<0.1,])
#precombat PCA
pca<-prcomp(t(cells_4_PBMC_filt))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:15,1],pca$x[1:15,2],col=2,pch=2)
  points(pca$x[16:30,1],pca$x[16:30,2],col=3,pch=2)
}

which(pca$x[,1]< -100)
## named integer(0)
which(pca$x[,1]< -20)
##             Series5_A549_Mock_1             Series5_A549_Mock_2 
##                               1                               2 
##             Series5_A549_Mock_3        Series6_A549.ACE2_Mock_1 
##                               3                               4 
##        Series6_A549.ACE2_Mock_2        Series6_A549.ACE2_Mock_3 
##                               5                               6 
##            Series7_Calu3_Mock_1            Series7_Calu3_Mock_2 
##                               7                               8 
##            Series7_Calu3_Mock_3       Series16_A549.ACE2_Mock_1 
##                               9                              10 
##       Series16_A549.ACE2_Mock_2       Series16_A549.ACE2_Mock_3 
##                              11                              12 
##       Series5_A549_SARS.CoV.2_1       Series5_A549_SARS.CoV.2_2 
##                              16                              17 
##       Series5_A549_SARS.CoV.2_3  Series6_A549.ACE2_SARS.CoV.2_1 
##                              18                              19 
##  Series6_A549.ACE2_SARS.CoV.2_2  Series6_A549.ACE2_SARS.CoV.2_3 
##                              20                              21 
##      Series7_Calu3_SARS.CoV.2_1      Series7_Calu3_SARS.CoV.2_2 
##                              22                              23 
##      Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1 
##                              24                              25 
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3 
##                              26                              27
which(pca$x[,1]< 0)
##             Series5_A549_Mock_1             Series5_A549_Mock_2 
##                               1                               2 
##             Series5_A549_Mock_3        Series6_A549.ACE2_Mock_1 
##                               3                               4 
##        Series6_A549.ACE2_Mock_2        Series6_A549.ACE2_Mock_3 
##                               5                               6 
##            Series7_Calu3_Mock_1            Series7_Calu3_Mock_2 
##                               7                               8 
##            Series7_Calu3_Mock_3       Series16_A549.ACE2_Mock_1 
##                               9                              10 
##       Series16_A549.ACE2_Mock_2       Series16_A549.ACE2_Mock_3 
##                              11                              12 
##       Series5_A549_SARS.CoV.2_1       Series5_A549_SARS.CoV.2_2 
##                              16                              17 
##       Series5_A549_SARS.CoV.2_3  Series6_A549.ACE2_SARS.CoV.2_1 
##                              18                              19 
##  Series6_A549.ACE2_SARS.CoV.2_2  Series6_A549.ACE2_SARS.CoV.2_3 
##                              20                              21 
##      Series7_Calu3_SARS.CoV.2_1      Series7_Calu3_SARS.CoV.2_2 
##                              22                              23 
##      Series7_Calu3_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_1 
##                              24                              25 
## Series16_A549.ACE2_SARS.CoV.2_2 Series16_A549.ACE2_SARS.CoV.2_3 
##                              26                              27
###we need to batch adjust based on the cell types
##Series5_A549-1 Series6_A549.ACE2-2 Series7_Calu3-3 Series16_A549.ACE2-4, patients-5
bat <-as.data.frame(cbind(
  colnames(cells_4_PBMC_filt),
  c(rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(5, 3),rep(1, 3),rep(2, 3),rep(3, 3),rep(4, 3),rep(6, 3)),
  c(rep(1, 15), rep(2, 15))))
mod<-model.matrix(~as.factor(bat[,3]), data=bat)
combat_mock_cov<-ComBat(dat = as.matrix(cells_4_PBMC_filt,),batch = (bat[,2]),mod=mod,par.prior = T)
## Found6batches
## Adjusting for1covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
##PCA post combat
pca<-prcomp(t(combat_mock_cov))
plot(pca)

{plot(pca$x[,1],pca$x[,2])
  points(pca$x[1:15,1],pca$x[1:15,2], main="Top 2 PCs",col=2)
  points(pca$x[16:30,1],pca$x[16:30,2], main="Top 2 PCs",col=3)}

which(pca$x[,1]< -50)
## named integer(0)
which(pca$x[,2]< -50)
## named integer(0)
plot(hclust(dist(t(combat_mock_cov)),method="complete"))

########running assign with the best 25 genes found in the cell line data########
#combat_mock_cov<-read.table("combat_cell56716_PBMC.txt",header=TRUE)
c_mock<-as.matrix(combat_mock_cov[,c(1:12)])
c_cov<-as.matrix(combat_mock_cov[,c(16:27)])
test<-as.matrix(combat_mock_cov[,c(13:15,28:30)])
trainingLabela <- list(control=list(mock=1:12),cov=13:24)
genelist_25<-read.csv("signature_gene_list_prior_25yueli.csv")
basedir<-getwd()
sub_dir <- paste(basedir,paste("cov_PBMC", 25, sep=""),sep='/')
dir.create(sub_dir)
set.seed(1220)
assign.wrapper(
  trainingData = cbind(c_mock, c_cov),
  testData = test,
  trainingLabel = trainingLabela,
  geneList = list(genelist_25$X),
  n_sigGene = 25,
  adaptive_B = T,
  adaptive_S = F,
  outputDir = sub_dir,
  p_beta = 0.01,
  theta0 = 0.05,
  theta1 = 0.9,
  iter = 2000,
  burn_in = 1000)
## Performing QC on the input data...
## Warning in assign.preprocess(trainingData, testData, anchorGenes, excludeGenes, : Control Labels DO NOT match the experimental Labels!
## Please make sure that you specify the correct indice for control and experimental samples in the trainingLabel!
## Generating starting/prior values for model parameters...
## Estimating model parameters in the training dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Estimating model parameters in the test dataset...
## Start Gibbs sampling...
## | 0%                                  50%                                 100% |
## Outputing results...
#single_cell_integrate
#input, CreateSeuratObject, filter, save
a<-"hc_51_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 15385 features across 11113 samples within 1 assay 
## Active assay: RNA (15385 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  IL32, CD2, TRAC, CCL5, LTB, TRBC2, CORO1A, TRBC1, KLRB1, CD3D 
##     CD3E, IFITM1, GZMA, CLEC2D, ACAP1, LIMD2, GIMAP7, LINC01943, LCK, TRAF3IP3 
##     PTPN7, LINC01871, CD3G, IFITM2, ISG20, NKG7, ETS1, CD48, BTG1, CD40LG 
## Negative:  MARCO, C1QB, C1QA, GRN, FTL, C1QC, TYROBP, HLA-DRB5, FCER1G, LGALS3 
##     HLA-DRB1, CD68, CD63, CTSD, SERPINA1, HLA-DQA1, HLA-DRA, FBP1, HLA-DQB1, TSPO 
##     ACP5, S100A11, ANXA2, CTSS, SERPING1, MGST3, PSAP, FABP4, CAPG, S100A9 
## PC_ 2 
## Positive:  APOE, APOC1, FTL, LYZ, CFD, NUPR1, CTSS, PLTP, SAT1, GPNMB 
##     PSAP, LIPA, SOD2, HLA-DPA1, NPC2, HLA-DPB1, LINC01827, CD14, HLA-DMB, TYROBP 
##     PLD3, RARRES1, HLA-DRA, KCNMA1, A2M, ABCA1, BLVRB, AKR1B1, CPVL, GPX3 
## Negative:  TOP2A, BIRC5, CDK1, PCLAF, UBE2C, NUSAP1, MKI67, TYMS, CDKN3, HMMR 
##     PTTG1, GTSE1, CDC20, CCNB2, ASPM, CENPF, PRC1, MAD2L1, SPC25, TK1 
##     RRM2, CENPM, CEP55, TPX2, CKS2, ANLN, SGO1, CCNB1, CENPA, NUF2 
## PC_ 3 
## Positive:  MCEMP1, CD52, INHBA, RGCC, S100A4, MRC1, RETN, GPD1, AQP3, CRIP1 
##     PNPLA6, CALM1, MYL12A, STXBP2, FBP1, IGF1, ISG15, CCND3, HCAR2, LY6E 
##     MYL6, THBS1, RHBDD2, PTPMT1, PPARG, ALOX5AP, GLRX, DECR1, TSPO, LINC02345 
## Negative:  LYZ, APOE, TOP2A, BIRC5, CDK1, UBE2C, NUSAP1, PCLAF, MKI67, HMMR 
##     CDKN3, GTSE1, PLTP, CDC20, TYMS, CENPF, CCNB2, PRC1, APOC1, ASPM 
##     CFD, MAD2L1, MARCKS, CXCR4, NUPR1, CTSS, GPNMB, CD14, NPC2, CEP55 
## PC_ 4 
## Positive:  PKIB, CD1C, FCGR2B, FCER1A, C15orf48, MARCKS, ACTB, LGALS2, CLEC10A, CD1E 
##     ACTG1, MEF2C, BASP1, GSN, EMP3, FPR3, EMP1, S100A10, LMNA, FGL2 
##     CCL17, RNASE6, PFN1, RNASE1, CD1A, CRIP1, MYADM, PEA15, TAGLN2, AP1S2 
## Negative:  CSTA, CFD, FABP4, LYZ, NUPR1, FTL, SERPING1, IGF1, CTSC, APOC1 
##     VMO1, PLAC8, RBP4, ALDH2, LINC01827, C1QA, STMN1, AKR1B1, C1QB, SPARC 
##     NUP214, MYB, C9orf16, LY86, KCNAB1, ABHD5, SERPINA1, MT2A, MARCO, LINC02154 
## PC_ 5 
## Positive:  OTOA, CCL18, CTSL, GCHFR, TFRC, IL32, FDX1, GPNMB, CSTB, SDC2 
##     LIPA, SCD, CD2, RMDN3, HES2, APOC1, CA2, CCL5, TRAC, FABP3 
##     CD52, CD36, LGMN, CYP27A1, NR1H3, CD99, PLIN2, FN1, TRBC2, CTSB 
## Negative:  FCER1A, CD1C, PKIB, CD1E, MNDA, LGALS2, CLEC10A, MEF2C, HLA-DPB1, FCGR2B 
##     FGL2, MS4A6A, HLA-DPA1, PLAC8, HLA-DQB1, HLA-DRA, IFITM3, CCL17, CD1A, HLA-DRB1 
##     IGF1, THBS1, HLA-DQA1, CSTA, CLEC7A, LYZ, HLA-DRB5, CLEC5A, TYROBP, S100B
pbmc <- RunUMAP(pbmc, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 06:26:44 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:26:44 Read 8923 rows and found 10 numeric columns
## 06:26:44 Using Annoy for neighbor search, n_neighbors = 30
## 06:26:44 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:26:47 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b14e3782a3
## 06:26:47 Searching Annoy index using 1 thread, search_k = 3000
## 06:26:50 Annoy recall = 100%
## 06:26:51 Commencing smooth kNN distance calibration using 1 thread
## 06:26:53 Initializing from normalized Laplacian + noise
## 06:26:54 Commencing optimization for 500 epochs, with 365236 positive edges
## 06:27:16 Optimization finished
saveRDS(pbmc, file = "hc51.rds")

a<-"hc_52_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 14748 features across 10361 samples within 1 assay 
## Active assay: RNA (14748 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  C1QB, C1QC, ANXA2, S100A11, CD52, ACTB, CFL1, MYL6, C1QA, CD63 
##     SERF2, FBP1, RETN, TFRC, CSTB, GRN, NOP10, PFN1, S100A10, MCEMP1 
##     TSPO, PRDX1, TMSB10, IFI6, IGFBP2, MARCO, TGM2, HSP90AA1, LGALS3, MSR1 
## Negative:  MALAT1, LYZ, CXCR4, CFD, BTG1, C3, RGS2, MT2A, SNHG7, MEF2C 
##     IL32, LINC01827, EGR1, LTB, RGS18, CD2, CORO1A, FOS, CLDND2, SNHG12 
##     TRAC, LGALS2, RCSD1, CD1D, CLEC4E, GPR155, LTC4S, MT1X, WFDC2, AGR3 
## PC_ 2 
## Positive:  FTL, APOE, APOC1, GPNMB, LGALS1, OTOA, CSTB, LIPA, CTSD, PSAP 
##     NPC2, RARRES1, PLA2G7, PLD3, FTH1, MARCKS, A2M, CD36, CTSB, HLA-A 
##     BLVRB, CD14, CD48, HLA-B, CFD, SGK1, LGMN, ABCA1, SCD, SDC2 
## Negative:  MCEMP1, RETN, INHBA, ALOX5AP, C1QA, PLAC8, MRC1, S100A4, CRIP1, STXBP2 
##     HP, IGF1, C1QB, TSPO, TCF7L2, FBP1, PNPLA6, RGCC, CD52, IGFBP2 
##     C1QC, TMSB4X, PARAL1, MYL6, S100A9, CYSLTR1, LY6E, HLA-DRA, CYBB, FOLR3 
## PC_ 3 
## Positive:  TMSB4X, TYROBP, FTH1, FTL, CD74, LYZ, HLA-DRA, APOC1, APOE, CFD 
##     C1QB, B2M, HLA-DQA1, C1QA, GRN, CTSD, NUPR1, HCST, PSAP, SRGN 
##     FABP4, LGALS1, ALOX5AP, NPC2, TMSB10, VIM, S100A4, SERPING1, MARCO, CD68 
## Negative:  SLPI, GSTA1, SMIM22, TSPAN1, AGR3, CD24, FXYD3, TFF3, C9orf24, IGFBP7 
##     MS4A8, CAPS, WFDC2, PERP, ANKRD44-AS1, RSPH1, CLU, FAM183A, KRT8, LRRIQ1 
##     TMEM190, SNTN, NQO1, C1orf194, SPATA18, DYNLRB2, ECRG4, C20orf85, C12orf75, C11orf88 
## PC_ 4 
## Positive:  TYROBP, FTL, APOC1, FTH1, LYZ, CD74, HLA-DRA, CFD, FABP4, C1QB 
##     NUPR1, TMSB4X, B2M, C1QA, APOE, RBP4, HLA-DQA1, SERPING1, MARCO, MT2A 
##     CES1, LINC01827, TREM1, GRN, HCST, GCHFR, NPC2, PSAP, HLA-B, GLUL 
## Negative:  MARCKS, PLA2G7, EMP1, SDS, CCL4, SPP1, C15orf48, SNCA, CHI3L1, ACTG1 
##     CCL3, CCL4L2, ACTB, CCL2, CHIT1, FCGR2B, CORO1A, FNIP2, LGMN, RGCC 
##     MALAT1, MTHFD2, INHBA, CCL20, TNFAIP6, GBP1, SLAMF7, GDF15, SOD2, TNIP3 
## PC_ 5 
## Positive:  LGALS3, CD52, CSTB, CD9, ACP5, FDX1, GCHFR, HES2, GDF15, OTOA 
##     PPDPF, PLA2G7, CD63, CA2, CHIT1, RGCC, TUBA1B, GPD1, FABP3, FTL 
##     HEXB, CD68, RBP4, TXN, RETREG1, CHCHD6, DBI, CYP27A1, NOP10, HS3ST2 
## Negative:  CCL4, CCL4L2, LYZ, TNFAIP6, HLA-DRA, CCL3, CXCL10, CD74, TNIP3, CCL20 
##     SOD2, CCL3L1, TMSB4X, FCGR3A, SRGN, GBP1, S100A8, SAT1, MARCKS, GCH1 
##     GBP5, PLEK, FOS, MT2A, B2M, MT-CO1, CXCL3, VAMP5, EGR1, CXCL11
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:30:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:30:49 Read 7533 rows and found 10 numeric columns
## 06:30:49 Using Annoy for neighbor search, n_neighbors = 30
## 06:30:49 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:30:51 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b163c8aac1
## 06:30:51 Searching Annoy index using 1 thread, search_k = 3000
## 06:30:53 Annoy recall = 100%
## 06:30:54 Commencing smooth kNN distance calibration using 1 thread
## 06:30:57 Initializing from normalized Laplacian + noise
## 06:30:57 Commencing optimization for 500 epochs, with 307648 positive edges
## 06:31:17 Optimization finished
saveRDS(pbmc, file = "hc52.rds")

a<-"hc_100_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "h3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 16315 features across 6578 samples within 1 assay 
## Active assay: RNA (16315 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  KRT78, LCN2, KRT19, ELF3, MUC21, PRSS22, ECM1, MAL, SPRR1B, KRT13 
##     SPRR3, LYPD2, SPRR2D, TMPRSS11B, PITX1, PPL, SPRR2A, EMP1, CEACAM6, FAM25A 
##     GPRC5A, SCEL, RHCG, EPS8L1, CD3E, DUSP5, IL32, TMPRSS11E, SPINK5, CDKN2B 
## Negative:  FTL, HLA-DRA, LYZ, TYROBP, HLA-DRB1, APOC1, CD74, C1QA, CST3, FCER1G 
##     APOE, C1QB, HLA-DQB1, HLA-DPA1, CTSD, CD68, GRN, ACP5, FBP1, FTH1 
##     HLA-DPB1, PSAP, MARCO, TSPO, FCGRT, FABP4, C1QC, CD9, CEBPB, MCEMP1 
## PC_ 2 
## Positive:  KRT78, LCN2, MUC21, ELF3, ECM1, KRT19, PRSS22, SPRR1B, KRT13, SPRR3 
##     LYPD2, SPRR2D, PITX1, EPS8L1, TMPRSS11B, EMP1, PPL, FTH1, S100A9, CEACAM6 
##     FAM25A, SPRR2A, GPRC5A, SCEL, NIBAN2, RHCG, TMPRSS11E, IL1RN, LGALS3, SPINK5 
## Negative:  CD3E, IL32, TRAC, CD2, CD3D, ETS1, CD3G, LTB, TRBC2, CORO1A 
##     GIMAP7, LIMD2, KLRB1, TRBC1, ISG20, IL7R, CD247, ACAP1, CCL5, RORA 
##     SYNE2, CD7, TRAF3IP3, GZMA, BCL11B, CD69, GPR183, MT-ND3, DGKA, RIPOR2 
## PC_ 3 
## Positive:  SLC11A1, TNFAIP2, NEAT1, TFRC, TAGLN, ITGAX, VMP1, DDHD1, INHBA, LRP1 
##     ZNF589, DMXL2, MYO6, CHML, UVSSA, CIITA, SIGLEC1, ZMAT1, CTNNB1, PKN2 
##     B3GNT5, SLC12A6, PER1, ACE, FOS, SLFN11, PTK2B, CXCL3, DUSP1, CLN8 
## Negative:  ACTB, ACTG1, MYL12A, S100A6, S100A11, SSR4, HLA-DPB1, HCST, SRGN, CD63 
##     LGALS1, FCER1G, CST3, NPC2, HLA-DPA1, C1QB, S100A9, PPDPF, HLA-DQA2, CSTB 
##     PPIB, ZFAS1, SEC61B, MGST3, NDUFB11, SUB1, EVI2B, TYROBP, CORO1A, GCHFR 
## PC_ 4 
## Positive:  SPINK5, PSCA, TMPRSS11E, KRT6A, SPRR2E, S100A14, LYPD2, SPRR3, KRT13, S100A9 
##     MAL, CNFN, FABP4, C19orf33, SPRR2A, SPRR2D, LCN2, KRT78, KRT19, SPRR1B 
##     PPL, S100A8, PRSS22, CD177, S100P, APOC1, AIF1L, ELF3, GCHFR, TGM3 
## Negative:  TAMALIN, SNCA, FPR3, MEF2C, BCL11A, MT-ND3, FCGR2B, MAP3K20, TRAF4, SNX9 
##     TCF4, P2RY14, CLTC, MS4A6A, MT-ND1, PMP22, IL13RA1, METRNL, F13A1, CLDN1 
##     STAB1, CSF1R, MT-ND5, PCBP1, AHNAK, SPP1, MT-ND2, CLEC4F, NLRP3, ZNF503 
## PC_ 5 
## Positive:  FABP4, MARCO, MGST3, CTSD, MCEMP1, CES1, TRAF4, APOC1, PLIN2, APOE 
##     FRMD8, TSPO, GZMB, FABP5, SLC3A2, MSR1, CEBPB, NUPR1, FBXO32, GNG12 
##     FBP1, RETN, ZNF185, GNE, ATP1B1, CRYM, MGST1, SCD, CA13, SSR4 
## Negative:  FGL2, LGALS2, CLEC10A, FCER1A, CD1C, CD1E, CLEC4F, KRT6A, SPRR2E, NDRG2 
##     LTC4S, S100B, MS4A6A, F13A1, SPINK5, PSCA, CD1A, CFP, CLDN1, S100A14 
##     PKIB, FCGR2B, CCL17, HLA-DQA1, CSF1R, TMPRSS11E, CNFN, AFDN, HSPA6, FPR3
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:32:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:32:28 Read 303 rows and found 10 numeric columns
## 06:32:28 Using Annoy for neighbor search, n_neighbors = 30
## 06:32:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:32:28 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b140dd96e9
## 06:32:28 Searching Annoy index using 1 thread, search_k = 3000
## 06:32:28 Annoy recall = 100%
## 06:32:29 Commencing smooth kNN distance calibration using 1 thread
## 06:32:29 Initializing from normalized Laplacian + noise
## 06:32:29 Commencing optimization for 500 epochs, with 11370 positive edges
## 06:32:30 Optimization finished
saveRDS(pbmc, file = "hc100.rds")

a<-"mild_141_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 17670 features across 4233 samples within 1 assay 
## Active assay: RNA (17670 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CST3, FTL, IFI30, FTH1, TYROBP, CD68, FCER1G, CXCL10, AIF1, LYZ 
##     GLUL, MNDA, GRN, SERPINA1, HLA-DRA, CTSL, MS4A6A, SERPING1, SPI1, APOBEC3A 
##     MAFB, APOC1, MS4A4A, ATOX1, FCGR1A, C1QC, C1QB, NUPR1, HLA-DRB5, FCGR3A 
## Negative:  IL32, CCL5, CD52, GIMAP7, CD7, GZMA, CD8A, TRBC2, NKG7, PRF1 
##     CD8B, CST7, CTSW, SPOCK2, BTG1, LAG3, IL2RB, CXCR3, ITGA4, HOPX 
##     SH2D2A, GNG2, LBH, FASLG, CD6, AHNAK, CD69, CXCR6, LINC00861, GZMB 
## PC_ 2 
## Positive:  WFDC2, SLPI, LCN2, KRT19, FXYD3, CXCL17, AGR2, PIGR, SCGB3A1, S100P 
##     TSPAN1, GSTA1, SCGB1A1, SERPINB3, ADH1C, CYP2F1, TFF3, BPIFB1, KRT18, PRSS23 
##     S100A14, CEACAM6, C19orf33, MUC5B, MUC5AC, LYPD2, MSMB, ASS1, AKR1C1, AQP5 
## Negative:  MX1, RSAD2, ISG20, IRF7, GBP1, KLF6, ISG15, STAT1, SRGN, DDX58 
##     IFIH1, GBP4, GBP5, MT-CO1, TNFSF10, IFIT3, STAT2, CD69, IFIT1, TYMP 
##     GIMAP7, CD38, NUB1, MT-CO2, S100A10, IL32, RGS1, SOCS1, APOBEC3G, IFIT2 
## PC_ 3 
## Positive:  APOC1, NUPR1, MARCO, S100A4, VIM, FTL, FABP4, MCEMP1, APOE, C1QB 
##     C1QC, C1QA, RBP4, CAPG, S100A8, CYP27A1, STXBP2, PARAL1, SNX10, PELATON 
##     NCF2, GCHFR, AIF1, S100A11, S100A9, CES1, SERPINA1, MGST3, S100A10, PPARG 
## Negative:  CCR7, TSPAN13, IGKC, NR4A3, MEF2C, IDO1, BASP1, IRF8, TCF4, CD83 
##     LAD1, BCL11A, WDFY4, LRRK1, LAMP3, ETV6, FSCN1, NRP2, BIRC3, TBC1D8 
##     MARCKSL1, RGS1, NEURL3, DUSP5, CERS6, NAPSA, SLC41A2, NFKB1, CD79A, IGFBP4 
## PC_ 4 
## Positive:  MS4A1, CD79A, IGKC, HLA-DQA1, MEF2C, BANK1, FCRLA, NAPSA, HLA-DQB1, CD19 
##     LINC02397, TCF4, HLA-DQA2, CPNE5, GAPT, PLD4, RHEX, HLA-DRA, JCHAIN, BLNK 
##     BCL11A, DERL3, HLA-DPB1, PLAC8, IGHA1, FCRL5, IGHM, IRF8, HLA-DPA1, PAX5 
## Negative:  S100A6, WFDC2, S100P, LCN2, IFI6, SLPI, KRT19, RSAD2, AGR2, PIGR 
##     GBP1, KLF6, CEACAM6, CXCL17, LYPD2, CYP2F1, ADH1C, FXYD3, ASS1, SCGB3A1 
##     PRSS23, GBP5, S100A14, LAG3, TSPAN1, S100A4, MDK, S100A10, KRT18, C19orf33 
## PC_ 5 
## Positive:  MT2A, TNFSF10, IFIT3, IFIT1, IFIT2, ISG15, IFI6, ICAM2, MX1, GBP1 
##     GBP4, RIPOR2, LTB, DDX58, STAT1, SAT1, ISG20, SELL, RSAD2, IRF7 
##     SOCS1, ICOS, CD28, IFIH1, TMSB10, MAL, CTLA4, TESPA1, TNFRSF4, BCL2L14 
## Negative:  HOPX, ZNF683, CD8A, KLRC1, CXCR6, CD63, KLRD1, ITGAE, PLEKHF1, CCL5 
##     NKG7, CTSW, GPR25, LINC02446, TRG-AS1, ITGA1, CD8B, CD244, FCRL6, AOAH 
##     CCL4, KIR2DL4, KLRC3, CST7, ID2, CD7, TRGV7, KIR3DL2, MATK, SLAMF7
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:32:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:32:39 Read 1366 rows and found 10 numeric columns
## 06:32:39 Using Annoy for neighbor search, n_neighbors = 30
## 06:32:39 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:32:39 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b1430d8258
## 06:32:39 Searching Annoy index using 1 thread, search_k = 3000
## 06:32:39 Annoy recall = 100%
## 06:32:40 Commencing smooth kNN distance calibration using 1 thread
## 06:32:40 Initializing from normalized Laplacian + noise
## 06:32:40 Commencing optimization for 500 epochs, with 50582 positive edges
## 06:32:42 Optimization finished
saveRDS(pbmc, file = "mild141.rds")


a<-"mild_142_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 17512 features across 4559 samples within 1 assay 
## Active assay: RNA (17512 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  FTL, FTH1, TYROBP, FCER1G, CST3, CD68, IFI30, AIF1, APOC1, GLUL 
##     GRN, SPI1, MNDA, S100A11, NUPR1, HLA-DRA, CTSL, CXCL10, C1QA, C1QC 
##     APOE, MSR1, MS4A4A, LYZ, C1QB, HLA-DQA1, SERPINA1, LGALS3, S100A9, HLA-DQB1 
## Negative:  IL32, CCL5, CD7, IFITM1, GZMA, MALAT1, PRF1, CST7, NKG7, TRBC2 
##     CD8A, CD52, CD8B, CXCR3, CTSW, LAG3, IL2RB, SH2D2A, LINC01871, FASLG 
##     ISG20, LINC00861, GZMB, GZMH, SAMD3, CD69, ID2, TRBC1, CXCR6, GZMK 
## PC_ 2 
## Positive:  MALAT1, RSAD2, NEAT1, MX1, PARP14, IFITM1, SIGLEC1, TNFSF10, ISG20, MARCKS 
##     DDX58, KLF6, IFIH1, ADA2, GBP1, CCL2, CCL8, PLA2G7, SDS, FNIP2 
##     RNASE1, SRGN, IRF7, LILRB2, TTYH3, FCN1, CCR1, LGMN, STAT2, VMP1 
## Negative:  FABP4, NUPR1, FTL, FTH1, APOC1, SLPI, ALDH2, TFF3, SCGB1A1, WFDC2 
##     CXCL17, MSMB, KRT19, LCN2, GSTA1, C1QA, SCGB3A1, SERPINB3, CYP2F1, C1QB 
##     BPIFB1, MCEMP1, MUC5B, FBP1, S100A9, CYB5A, ALDH1A1, AGR2, PRDX1, CCL18 
## PC_ 3 
## Positive:  KRT19, LCN2, SLPI, CXCL17, S100P, S100A14, TFF3, SCGB1A1, MSMB, WFDC2 
##     CYP2F1, SCGB3A1, SERPINB3, MUC5B, BPIFB1, LYPD2, AGR2, C19orf33, PIGR, KRT7 
##     GSTA1, AKR1C1, PRSS23, FXYD3, MSLN, STEAP4, ADH1C, ADIRF, RAB25, ELF3 
## Negative:  VIM, APOC1, FTL, FABP4, NUPR1, C1QA, C1QB, ACTB, SPI1, C1QC 
##     LGALS1, FTH1, AIF1, CAPG, C1orf162, MARCO, MCEMP1, TYROBP, CCL18, CXCL16 
##     SNX10, FABP5, GLRX, FBP1, CD52, BCL2A1, ALDH2, APOE, NCF2, CD68 
## PC_ 4 
## Positive:  SCGB1A1, WFDC2, TFF3, SCGB3A1, MSMB, CYP2F1, MUC5B, BPIFB1, SLPI, CXCL17 
##     SERPINB3, AGR2, PIGR, AKR1C1, PRSS23, GSTA1, ADH1C, FXYD3, MUC5AC, C3 
##     TSPAN8, LY6D, TMEM45A, CREB3L1, TSPAN1, SERPINB11, AQP5, PART1, KRT8, KRT18 
## Negative:  NCCRP1, KRT16, SPRR3, SDCBP2, KRT13, ECM1, NECTIN4, PRSS27, RND3, SPNS2 
##     TMPRSS11E, KRT78, RNF39, TGM3, HSPB8, CRISP3, TMPRSS11B, SCEL, MUC21, CYSRT1 
##     FAM25A, AIF1L, GPRC5A, PADI1, EPS8L1, NR1D1, GRHL1, TRIP10, ATG9B, PPL 
## PC_ 5 
## Positive:  ZNF683, CCL5, CCL4, NKG7, CD63, CD8A, GZMH, CTSW, ITGA1, LINC02446 
##     CD7, CXCR6, KLRD1, CD8B, KLRC1, PLEKHF1, ITGAE, PRF1, CCL4L2, GPR25 
##     GZMA, HOPX, TRGV2, FCRL6, CST7, GZMB, CD244, IL2RB, MATK, CTSD 
## Negative:  BCL11A, IGKC, CCR7, SELL, PLD4, JCHAIN, TCF4, MS4A1, CD79A, BANK1 
##     SPIB, LINC02397, PLAC8, IGHM, NAPSA, CPNE5, TNFSF10, COBLL1, TSPAN13, P2RY14 
##     SCT, CD24, RHEX, MEF2C, LTB, EAF2, IRF8, IFIT3, CD19, RIPOR2
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:01 Read 1694 rows and found 10 numeric columns
## 06:33:01 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:01 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16718c470
## 06:33:01 Searching Annoy index using 1 thread, search_k = 3000
## 06:33:01 Annoy recall = 100%
## 06:33:01 Commencing smooth kNN distance calibration using 1 thread
## 06:33:02 Initializing from normalized Laplacian + noise
## 06:33:02 Commencing optimization for 500 epochs, with 63580 positive edges
## 06:33:04 Optimization finished
saveRDS(pbmc, file = "mild142.rds")

a<-"mild_144_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "m3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 15374 features across 913 samples within 1 assay 
## Active assay: RNA (15374 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CTSL, SGK1, APOBEC3A, LILRB2, IL4I1, SIGLEC1, OLR1, LILRB3, C15orf48, SDS 
##     IL1RN, SLC11A1, PLAUR, FN1, SLC43A2, NEAT1, LGMN, EMP1, CTSD, CMKLR1 
##     SERPINA1, SIRPA, CCL2, FCN1, GRINA, LILRB4, DEFB1, CD83, CCL8, DUSP1 
## Negative:  SCGB3A1, MSMB, SLPI, SCGB1A1, WFDC2, TFF3, CXCL17, KRT19, BPIFB1, CCL5 
##     MUC5B, AGR2, SERPINB3, PIGR, MT1E, LCN2, S100P, KRT18, CYP2F1, AKR1C1 
##     GSTA1, HOPX, NKG7, CD2, C19orf33, MUC5AC, LYPD2, ZNF683, CXCL6, PSCA 
## PC_ 2 
## Positive:  CST3, FTH1, HLA-DRA, IFI27, FTL, HLA-DQA1, LYZ, TYROBP, SLPI, AIF1 
##     C1QC, CD74, HLA-DQB1, SCGB1A1, C1QB, WFDC2, MSMB, C1QA, TFF3, SPI1 
##     CXCL17, HLA-DRB5, NPC2, HLA-DPA1, SCGB3A1, KRT19, BPIFB1, HLA-DPB1, FCER1G, VMO1 
## Negative:  CCL5, CD7, PRF1, MALAT1, CD2, NKG7, CD3G, CTSW, GZMB, LAG3 
##     GZMA, CD8A, KLRC1, AHNAK, ALOX5AP, CST7, CD8B, CLEC2D, PTPN22, IFITM1 
##     CXCR6, SLFN5, GZMH, CBLB, SH2D2A, IKZF3, SPOCK2, CCL4, CD44, ITGAE 
## PC_ 3 
## Positive:  DRC3, DNAH9, LDLRAD1, CCDC189, CCDC33, C4orf47, FAM166B, CCDC78, DNAH10, TSNAXIP1 
##     WDR38, SPAG17, RSPH1, ELF3, CCDC170, DNALI1, CAPS, DNAH12, CCDC173, CFAP43 
##     LRRC23, CFAP157, CCDC191, EPHX2, RPGRIP1L, AGBL4, EFHC1, MAP3K19, TXLNB, DNAI2 
## Negative:  FTL, HLA-DPB1, HLA-DPA1, TYROBP, AIF1, HLA-DRB1, HLA-DQB1, C1QC, C1QA, SPI1 
##     C1QB, HLA-DQA1, FCER1G, HLA-DRB5, TUBA1B, MS4A6A, HLA-DRA, CST3, IFI30, NPC2 
##     CALHM6, CD68, LYZ, LGALS1, MNDA, LGALS2, VIM, CD74, PSAP, FGL2 
## PC_ 4 
## Positive:  HLA-DPB1, HLA-DPA1, C1QC, HLA-DQA1, C1QA, C1QB, HLA-DRA, AIF1, SPI1, TYROBP 
##     HLA-DQB1, HLA-DRB1, CALHM6, CD74, HLA-DRB5, MS4A6A, LGALS2, FCER1G, FTL, DNAH12 
##     DRC3, CCDC189, SPAG17, TSNAXIP1, CCDC78, C4orf47, DNAH9, SLC40A1, LDLRAD1, CCDC173 
## Negative:  WFDC2, MSMB, SCGB1A1, TFF3, CXCL17, SLPI, S100P, LCN2, BPIFB1, SCGB3A1 
##     KRT19, AKR1C1, CYP2F1, AGR2, SERPINB3, MDK, KRT18, MUC5B, S100A6, PRSS23 
##     C19orf33, LYPD2, CEACAM6, MUC5AC, PIGR, KRT7, PSCA, S100A14, CXCL6, FXYD3 
## PC_ 5 
## Positive:  IGFBP7, C9orf24, C20orf85, VEGFA, NR4A3, HAPLN3, SNTN, C1orf194, RGS1, CD38 
##     PML, ETV6, CFAP43, CAPSL, PIFO, CREM, SAXO2, TMEM190, AREG, MORN2 
##     SCIN, IGFBP2, C9orf135, ISG15, FAM229B, PPP1R42, NT5C3A, PITPNA, DUSP2, IDO1 
## Negative:  RND3, ULK1, GLB1L, LIMS2, MAN2C1, MNT, PTPRM, MARCO, ZNF469, PDK4 
##     PLD3, ADCK5, PLIN2, SEPTIN10, APOC1, H2BC8, CDH23, GPNMB, AHI1, PTPN13 
##     PPP2R3B, H3C7, CASP5, SLC3A2, TIMP2, RRP12, DPY19L2, DNAAF1, NR1H3, PITPNM1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:19 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:19 Read 375 rows and found 10 numeric columns
## 06:33:19 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:19 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:19 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b13469270d
## 06:33:19 Searching Annoy index using 1 thread, search_k = 3000
## 06:33:19 Annoy recall = 100%
## 06:33:19 Commencing smooth kNN distance calibration using 1 thread
## 06:33:20 Initializing from normalized Laplacian + noise
## 06:33:20 Commencing optimization for 500 epochs, with 13450 positive edges
## 06:33:20 Optimization finished
saveRDS(pbmc, file = "mild144.rds")

a<-"severe_143_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s1", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 18344 features across 20286 samples within 1 assay 
## Active assay: RNA (18344 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CD2, IL32, CD3E, PTPRCAP, CD7, GZMA, CD3D, CCL5, CD247, TRBC2 
##     LCK, OCIAD2, CD96, PRF1, LIMD2, LDHB, SPOCK2, GZMB, SEPTIN1, SH2D2A 
##     IL2RB, LTB, SYNE2, ETS1, CLEC2D, RPSA, CD3G, ZAP70, MZT2A, RPS4X 
## Negative:  CTSL, CCL2, CST3, CTSB, CTSD, IL1RN, APOBEC3A, S100A9, S100A8, PLAUR 
##     LGALS3, MAFB, SGK1, LYZ, FCN1, TIMP1, CCL3, CXCL10, CSTB, HLA-DRA 
##     IGSF6, S100A6, DEFB1, LILRB4, CCL8, IDO1, SLC11A1, CALHM6, ATF5, CD163 
## PC_ 2 
## Positive:  HCAR3, FCGR3B, HCAR2, PLAU, TNFAIP6, G0S2, CCL3L1, CXCL8, CCL4L2, PI3 
##     HES4, CXCR4, VEGFA, CARD17, RSAD2, CLEC4E, RGL4, PPIF, SLPI, S100P 
##     FFAR2, OSM, IRAK2, CMTM2, S100A8, NFKBIA, KATNBL1, DDIT3, SELL, GADD45B 
## Negative:  CD74, RPS27, HLA-DPA1, CTSB, HLA-DRB1, CST3, HLA-DPB1, RPS18, CTSL, PPDPF 
##     RPS15A, HLA-DRA, RPS6, CALR, RPS3, CTSZ, HLA-DRB5, RPS5, FABP5, HSPA8 
##     RPS4X, CTSC, HSP90AB1, RPLP0, RPL3, GSTP1, ACP5, LILRB4, HSPB1, CHCHD10 
## PC_ 3 
## Positive:  ELF3, KRT8, KRT18, KRT19, CLDN4, WFDC2, CAPS, C9orf24, C20orf85, FAM183A 
##     SMIM22, TACSTD2, TMEM190, CLDN3, MUC4, TPPP3, FXYD3, CCDC78, CXCL17, CLDN7 
##     C1orf194, RSPH1, C9orf116, ZMYND10, TSPAN1, DNALI1, NUPR1, DYNLRB2, CD24, LCN2 
## Negative:  S100A4, CTSC, TNFSF10, APOBEC3A, CORO1A, CTSL, CD52, CCL2, FCN1, CTSB 
##     PTPRE, LYZ, EVL, GIMAP7, CXCL10, CCL3, MAFB, DUSP6, CD2, ATF5 
##     IL1RN, GZMA, NCF1, RIN2, NKG7, LILRA5, DEFB1, CD7, IL4I1, CCL8 
## PC_ 4 
## Positive:  TNFSF10, DUSP6, APOBEC3A, FCN1, LILRA5, WARS1, IFIT1, ACTG1, RIN2, PTPRE 
##     IL1RN, VCAN, NCF1, MAFB, S100A12, ATF5, CLU, SERPINB1, CXCL11, S100A4 
##     S100A9, SERPINB9, IFIT2, S100A8, CXCL10, PLAC8, LYZ, OASL, ACOD1, CD300E 
## Negative:  APOE, SERPINH1, HSPB1, HSPA6, C1QC, C1QA, C1QB, DNAJB1, APOC1, BAG3 
##     FKBP4, CCL18, HSPH1, DNAJA4, HSPA1B, HSPD1, GPNMB, HSPA1A, IER5, HSPE1 
##     ZFAND2A, CACYBP, G0S2, TCEAL9, CRYAB, PLAU, LGMN, GADD45B, SCD, NRP2 
## PC_ 5 
## Positive:  DNAJB1, HSPA1A, HSPA1B, HSPA6, HSPH1, BAG3, HSP90AA1, DNAJA4, HSPD1, HSPB1 
##     FKBP4, ZFAND2A, CACYBP, HSP90AB1, SERPINH1, HSPE1, HSPA8, IER5, OLIG1, S100A12 
##     FCN1, S100A9, UBB, CRYAB, NR4A1, S100A8, ENGASE, DNAJB4, TENT5A, MRPL18 
## Negative:  C1QC, C1QB, C1QA, APOE, APOC1, GPNMB, NR1H3, LGMN, HLA-DQA1, CCL18 
##     PLD3, FPR3, A2M, MSR1, TREM2, SCD, KCNMA1, NUPR1, GCHFR, MMP14 
##     PLTP, VSIG4, TSPAN4, MGLL, CD276, FN1, PLA2G7, GM2A, NRP2, VAT1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:33:56 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:33:56 Read 12840 rows and found 10 numeric columns
## 06:33:56 Using Annoy for neighbor search, n_neighbors = 30
## 06:33:56 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:33:58 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b12d3f757b
## 06:33:58 Searching Annoy index using 1 thread, search_k = 3000
## 06:34:02 Annoy recall = 100%
## 06:34:02 Commencing smooth kNN distance calibration using 1 thread
## 06:34:03 Initializing from normalized Laplacian + noise
## 06:34:03 Commencing optimization for 200 epochs, with 506242 positive edges
## 06:34:08 Optimization finished
saveRDS(pbmc, file = "severe143.rds")

a<-"severe_145_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s2", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 17524 features across 17395 samples within 1 assay 
## Active assay: RNA (17524 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CTSL, FTL, CTSB, IFI30, PSAP, CTSD, CST3, CCL2, HLA-DRA, CD68 
##     CSTB, GLUL, HLA-DRB1, CD63, MAFB, GRN, CD163, LGALS3, TIMP1, MS4A4A 
##     CD74, LGMN, PLAUR, SERPINA1, HLA-DRB5, CCL8, SAT1, HLA-DPA1, HMOX1, DEFB1 
## Negative:  CCL5, PTPRCAP, IL32, CD7, CD3E, CD2, GZMA, GZMB, PRF1, CD3D 
##     NKG7, GNLY, CST7, TRBC2, SYNE2, CD247, CD8B, CD8A, LINC01871, GZMH 
##     C12orf75, GZMK, OCIAD2, CLIC3, ETS1, LCK, GNG2, TRAC, CD3G, CD96 
## PC_ 2 
## Positive:  CCL5, PTPRCAP, CD7, CD2, GZMA, CD3E, IL32, GZMB, PRF1, NKG7 
##     CD3D, GNLY, CST7, CD247, TRBC2, GZMH, LINC01871, CD8B, CD8A, GZMK 
##     GNG2, ETS1, KLRD1, CD3G, LCK, CD96, TRAC, CTSW, IL2RB, SH2D2A 
## Negative:  KRT8, CD24, CYP4B1, TSPAN1, KRT19, FXYD3, SMIM22, FAM183A, C9orf24, RSPH1 
##     C20orf85, WFDC2, GSTA1, C1orf194, KRT18, TACSTD2, CAPS, MUC4, DYNLRB2, C19orf33 
##     CHST9, AGR3, PIFO, PIGR, LRRIQ1, ALDH1A1, SLPI, MS4A8, ELF3, PERP 
## PC_ 3 
## Positive:  CYBA, LGALS1, CD74, HLA-DRB5, RPS18, RPL18A, C1QA, RPLP0, ALDOA, HCST 
##     CD68, CALR, RPS5, RPSA, HLA-DRB1, C1QC, MTRNR2L12, CAPG, RPS4X, MT-ND4L 
##     ATP6V0C, HLA-DPB1, SYNGR2, PTMS, HLA-DQA1, PRDX1, RPL10A, HLA-DMA, C1QB, CTSC 
## Negative:  S100A8, IFIT2, CCL2, IFIT1, RSAD2, CXCL10, TNFAIP6, SLPI, S100A12, IFIT3 
##     CEACAM1, CCL3L1, CXCL8, WFDC2, SCGB3A1, HCAR3, MSMB, BPIFA1, STEAP4, CH25H 
##     PROK2, SAT1, RSPH1, KCNJ15, SCGB1A1, TNFSF10, AGR2, SERPINB3, GSTA1, LYZ 
## PC_ 4 
## Positive:  IL1RN, CXCL10, TNFSF10, ISG15, CCL3L1, FCN1, IFIT1, APOBEC3A, IFIT2, LYZ 
##     PLAC8, S100A8, IFIT3, ISG20, CXCL11, IDO1, RSAD2, HPSE, S100A4, SERPINB9 
##     HES4, ZFP36, DEFB1, IL27, IFITM1, CCL3, OLR1, MT2A, ATF5, JUNB 
## Negative:  APOE, CCL18, LGMN, APOC1, GPNMB, C1QC, C1QB, CTSD, RNASE1, CTSL 
##     FABP5, FTL, SMPDL3A, C1QA, NR1H3, NRP2, ZNF638, PLIN2, CSTB, TSPAN4 
##     PLA2G7, CTSZ, LMNA, PLD3, CYB5A, PLTP, PLPP3, CD163, VAT1, ACP2 
## PC_ 5 
## Positive:  C9orf24, RSPH1, C20orf85, FAM183A, C1orf194, DYNLRB2, PIFO, LRRIQ1, C9orf116, CAPS 
##     SNTN, MORN2, DNAH12, C9orf135, C5orf49, CFAP300, FAM229B, CFAP53, MORN5, ENKUR 
##     KIF9, C11orf88, LDLRAD1, CCDC146, TPPP3, DNAH10, EFCAB1, PPIL6, CCDC170, CAPSL 
## Negative:  LYPD2, CEACAM6, CXCL17, LCN2, KRT19, TACSTD2, KRT4, GPRC5A, KRT7, CEACAM5 
##     F3, S100P, MUC1, AQP5, LMO7, GABRP, KLF5, SLC6A14, SERPINB3, S100A14 
##     STEAP4, C19orf33, MSMB, EPS8L1, MALL, RND3, KRT13, ELF3, CXCL6, MSLN
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:36:01 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:36:01 Read 13807 rows and found 10 numeric columns
## 06:36:01 Using Annoy for neighbor search, n_neighbors = 30
## 06:36:01 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:36:02 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16192a22b
## 06:36:02 Searching Annoy index using 1 thread, search_k = 3000
## 06:36:06 Annoy recall = 100%
## 06:36:07 Commencing smooth kNN distance calibration using 1 thread
## 06:36:08 Initializing from normalized Laplacian + noise
## 06:36:08 Commencing optimization for 200 epochs, with 549062 positive edges
## 06:36:13 Optimization finished
saveRDS(pbmc, file = "severe145.rds")

a<-"severe_146_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s3", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 16544 features across 3398 samples within 1 assay 
## Active assay: RNA (16544 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  ELF3, CLDN4, CAPS, TNFRSF12A, KRT19, CFAP157, GSTP1, SNHG12, CLDN3, SOX4 
##     GPX4, UBXN11, LRRC23, TUBB4B, FOXJ1, NUPR1, KRT18, PPDPF, H4C3, RRAD 
##     RPS18, PRDX5, RPLP0, PLXNB2, KRT8, GDF15, SNHG8, ZMYND10, ATP5IF1, DRC3 
## Negative:  CCL3L1, FTL, APOBEC3A, HCAR3, CXCL8, G0S2, PI3, PLAUR, CCL4L2, S100A12 
##     IL1B, TXNIP, CCL3, PROK2, C15orf48, OSM, S100A4, ZFP36, ATP2B1-AS1, CCL4 
##     CD177, ANXA2R, HMOX1, CD22, CD14, NPC2, KLF2, DUSP6, TMSB4X, SLAMF7 
## PC_ 2 
## Positive:  LGALS1, HLA-DRB1, HLA-DRA, HLA-DPA1, HLA-DRB5, CTSB, VIM, CTSL, CD74, HLA-DPB1 
##     PPIA, CAPG, CST3, RPS24, LILRB4, HAVCR2, CD81, EEF1A1, ANXA5, LAIR1 
##     ATP1B3, RPL10, CTSC, PTMA, CD68, ACP5, HCST, ADA2, CTSZ, PRDX1 
## Negative:  CFAP157, ELF3, CAPS, CLDN4, SLPI, CLDN3, DRC3, ZMYND10, FOXJ1, KRT19 
##     FAM183A, RSPH1, LRRIQ1, CCDC40, MUC4, C9orf24, LRRC46, RRAD, CCDC114, SOX4 
##     C20orf85, DNAH11, SPAG17, HYDIN, VWA3A, TNFRSF12A, C1orf194, LRRC23, WDR60, KRT8 
## PC_ 3 
## Positive:  PTPRCAP, CD2, CD3E, CD3D, PRF1, TRBC2, CD247, LCK, TRAC, CD96 
##     CCL5, GZMA, CD3G, CD7, GZMB, GIMAP7, CD8A, LAG3, ZAP70, SPOCK2 
##     NKG7, FYN, CHST12, GZMM, GPRIN3, IKZF3, CTSW, EVL, IL2RB, CD6 
## Negative:  CTSL, FTL, CST3, CTSB, LILRB4, CD68, CTSZ, HLA-DRA, CD163, LGMN 
##     PLA2G7, DAB2, PLD3, SDS, FNIP2, KYNU, IL4I1, C15orf48, GPNMB, MMP19 
##     CREG1, MSR1, LAIR1, ATOX1, MAFB, C1QA, RGL1, LHFPL2, GRN, CPM 
## PC_ 4 
## Positive:  PIK3R3, PLCG2, DNAH11, HEXIM1, AHI1, PIEZO1, WDR6, MUC16, ID2, SPG11 
##     MACC1, KCNQ1OT1, EFHC1, HSPA2, H3C1, H4C2, CDKN2B, TAF4B, CDRT1, PLK2 
##     SPAG17, ATP2C2, H2AC17, BEST4, MAFB, H4-16, MUC4, DLEC1, FBXW10, DOC2A 
## Negative:  C20orf85, PRDX5, C9orf116, IGFBP2, TUBA4B, C1orf194, C9orf24, CAPS, GSTP1, C5orf49 
##     NUPR1, FAM183A, ATP5IF1, RNF187, KRT8, KRT18, IGFBP7, CATSPERD, MORN2, CLDN3 
##     DMKN, DNALI1, WDR38, ROPN1L, RRAD, PIFO, TPPP3, PPIL6, DYNLRB2, PLPP2 
## PC_ 5 
## Positive:  DNAH3, CFAP47, DLEC1, EFHC1, DNAH12, TTLL10, VWA3A, DNAH6, RP1, LINC02832 
##     HYDIN, DTHD1, CDHR3, LRRC74B, CFAP65, CCDC17, DNAH9, DNAI1, CFAP100, LRRIQ1 
##     DOC2A, CDHR4, CFAP54, CCDC40, SPAG17, CFAP46, SAXO2, CFAP43, CCDC30, CROCC2 
## Negative:  IGFBP3, LMO7, LAMB3, MACC1, KRT7, LRRC8A, EPS8L1, TSHZ2, TACSTD2, PRSS23 
##     MAL2, ADIRF, ASS1, TM4SF1, ZNF431, GALNT5, SCGB1A1, S100A2, FGFR1, SPINK5 
##     MISP, H2AC11, SOX4, DST, RND3, LMTK3, MUC5AC, CDKN2B, EPS8L2, FSTL3
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:37:48 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:37:48 Read 2256 rows and found 10 numeric columns
## 06:37:48 Using Annoy for neighbor search, n_neighbors = 30
## 06:37:48 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:37:49 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b13298d258
## 06:37:49 Searching Annoy index using 1 thread, search_k = 3000
## 06:37:49 Annoy recall = 100%
## 06:37:50 Commencing smooth kNN distance calibration using 1 thread
## 06:37:50 Initializing from normalized Laplacian + noise
## 06:37:50 Commencing optimization for 500 epochs, with 92048 positive edges
## 06:37:53 Optimization finished
saveRDS(pbmc, file = "severe146.rds")

a<-"severe_148_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s4", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 17460 features across 2398 samples within 1 assay 
## Active assay: RNA (17460 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CD3E, IL32, CD7, CD3D, PTPRCAP, CD2, ETS1, TRBC2, CD247, CLEC2D 
##     OCIAD2, LCK, SH2D2A, RORA, SEPTIN1, LDHB, CCL5, IL7R, SPOCK2, SYNE2 
##     ZAP70, RPL5, RPSA, PRF1, CD6, CD96, PDCD4, TRAC, NKG7, GZMM 
## Negative:  FTL, IFI30, LYZ, CCL2, IL1RN, CTSL, APOBEC3A, CTSB, CD68, S100A9 
##     MAFB, TIMP1, FCN1, S100A8, CD300E, SGK1, CTSD, C15orf48, GLUL, PLAUR 
##     EMP1, HLA-DRA, CD14, CCL8, CD63, CYBB, MS4A6A, CD163, TGFBI, DUSP6 
## PC_ 2 
## Positive:  FCGR3B, IFIT2, IFIT3, S100A8, TNFAIP6, G0S2, IFIT1, CMTM2, S100A9, CXCR2 
##     PDE4B, SELL, HCAR3, TNFSF10, PROK2, RSAD2, ALOX5AP, ADGRG3, HCAR2, FFAR2 
##     ZBP1, MNDA, PI3, GBP5, PLEK, CXCL8, ALPL, PHACTR1, IL1RN, IL1B 
## Negative:  LGALS3, S100A10, EEF1A1, RPS8, GSTP1, CSTB, RPL13, PRDX1, RPS12, LMNA 
##     PDLIM1, CTSB, CD74, VIM, CD9, CALR, CLDN7, ANXA1, CD63, RPS27 
##     PPDPF, TUBB4B, UBC, RPLP0, HSP90AA1, RPS18, RHOB, RPS5, RPS15A, SSR4 
## PC_ 3 
## Positive:  ELF3, KRT8, CLDN7, AKR1C2, PDLIM1, KIAA1522, SMIM22, KRT19, MUC4, CLDN4 
##     CAPS, SLPI, CD24, RSPH1, GPX2, KRT7, SOX4, CLDN3, MAL2, C19orf33 
##     PPL, PPIL6, TSPAN1, KRT18, KLF5, TACSTD2, WFDC2, FXYD3, GSTA1, CXCL17 
## Negative:  VIM, EEF1A1, RPS27, RPS12, RPS3, RPL23A, RPS29, CALR, PABPC1, RPS4X 
##     RPS27A, RPS15A, RPL13, RPS6, RPL3, RPS8, CD3E, CD7, EMP3, RPSA 
##     HSPA8, RPS5, IL32, RPL5, CD3D, PTPRCAP, RPS18, RPS3A, RGS1, CD74 
## PC_ 4 
## Positive:  APOE, C1QC, APOC1, C1QA, C1QB, GPNMB, CCL18, LGMN, NR1H3, TREM2 
##     FABP5, CD276, HLA-DQA1, VAT1, SCD, GCHFR, ACP2, CD9, PLTP, A2M 
##     PMP22, CREG1, FTL, CD81, MRAS, HLA-DPB1, APOC2, TUBB, HLA-DPA1, SPP1 
## Negative:  S100A12, FCN1, VCAN, APOBEC3A, ZFP36, CKAP4, CD300E, HPSE, S100A9, S100A8 
##     LYZ, TNFSF10, FOS, LILRA5, WARS1, RIN2, NEXN, MAFB, RSAD2, NCF1 
##     DUSP6, CFP, EMP1, GBP1, IFIT2, GCH1, FOSB, IL1RN, NLRP3, CCL7 
## PC_ 5 
## Positive:  ADIRF, KLK11, DSG2, MET, EFNA1, C19orf33, S100A2, ASS1, SULT2B1, MSLN 
##     FSTL1, CEACAM5, VSIG2, LY6D, S100A14, MYO5B, HSPB8, CAV2, TM4SF1, KRTCAP3 
##     FGFBP1, MAL2, S100A16, AGR2, EPS8L1, BCAR1, FST, ITPKC, GPRC5A, KRT15 
## Negative:  PPIL6, CFAP157, PACRG, GDF15, CFAP53, H2BC18, GULP1, CCDC78, RSPH1, PIFO 
##     MAP1B, GIHCG, SPEF1, IQCK, CSKMT, C1orf194, CFAP45, MOV10L1, SH3BGR, C9orf24 
##     CAPS, SLC25A4, AARS1, COL1A1, ALDH3A1, DNAH11, H2BC9, SOX2, PABPC1L, GTF2IRD1
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:38:15 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:38:15 Read 697 rows and found 10 numeric columns
## 06:38:15 Using Annoy for neighbor search, n_neighbors = 30
## 06:38:15 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:38:15 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b16594c19b
## 06:38:15 Searching Annoy index using 1 thread, search_k = 3000
## 06:38:15 Annoy recall = 100%
## 06:38:15 Commencing smooth kNN distance calibration using 1 thread
## 06:38:16 Initializing from normalized Laplacian + noise
## 06:38:16 Commencing optimization for 500 epochs, with 26150 positive edges
## 06:38:18 Optimization finished
saveRDS(pbmc, file = "severe148.rds")

a<-"severe_149_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s5", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 16724 features across 2483 samples within 1 assay 
## Active assay: RNA (16724 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CST3, CTSL, IFI30, FTL, TYROBP, CTSD, FCER1G, CD68, CTSB, CCL2 
##     LGALS1, CD63, MS4A4A, CD163, CCL8, PSAP, FTH1, GLUL, MAFB, TIMP1 
##     GRN, NPC2, TYMP, AIF1, LGALS3, CREG1, FCGRT, HLA-DRA, MS4A6A, MARCKS 
## Negative:  IL32, CD3E, PTPRCAP, EVL, SPOCK2, IL7R, CD2, CD3D, LIMD2, CD247 
##     GPR171, RAC2, OCIAD2, LCK, BTG1, LDHB, ETS1, ZFP36L2, CCL5, CD7 
##     SYNE2, CD69, LTB, TRBC2, GIMAP7, PRF1, CLEC2D, ODF2L, PARP8, PIM2 
## PC_ 2 
## Positive:  SRGN, CD69, CD3E, RAC2, PTPRCAP, IL7R, ALOX5AP, CD52, LIMD2, IL32 
##     CD2, GIMAP7, EVL, SPOCK2, CCL5, CD3D, CCL4, LSP1, FTL, BTG1 
##     EMP3, CD247, LCK, GPR171, TYROBP, PRF1, CYTIP, LYAR, NKG7, TRBC2 
## Negative:  TACSTD2, ELF3, CLDN7, KRT7, MUC4, C19orf33, CXCL17, WFDC2, GPRC5A, CEACAM6 
##     CLDN4, MAL2, LMO7, KRT19, ALDH1A3, ADGRF1, FXYD3, S100A14, PLAAT2, SLC6A14 
##     SDCBP2, F3, C1orf116, MUC1, MUC16, ASS1, TM4SF1, CD24, SDC1, AGR2 
## PC_ 3 
## Positive:  FCGR3B, CSF3R, S100A8, SLC25A37, FPR1, CLEC4E, S100A9, BCL2A1, SELL, MNDA 
##     IFIT2, IL1RN, TNFAIP6, HES4, NCF1, HCAR3, HCAR2, CEACAM1, ISG15, SAT1 
##     TNFSF10, CLEC4A, P2RY13, FTH1, MT2A, CMTM2, SOD2, IGSF6, PHACTR1, TLR2 
## Negative:  MTRNR2L12, JUN, CCL5, NKG7, CALR, ARL4C, PRF1, CD3E, LAG3, IL32 
##     S100A10, ANXA1, CD2, ZFP36L2, CALM1, CTSC, CD3D, BTG1, CD7, PTPRCAP 
##     GZMB, GZMA, CD52, CD8A, EVL, PLAAT4, LCK, CLIC3, HLA-DPA1, CD8B 
## PC_ 4 
## Positive:  SLC6A14, MALL, S100A14, CEACAM6, SPINT1, CEACAM5, SCEL, EHF, PRSS8, KRT15 
##     SULT2B1, GPRC5A, SDC1, PLS3, S100A16, ASS1, PODXL, MPZL2, NEAT1, MUC21 
##     KLK10, MYH14, VSIG2, GRHL3, SDCBP2, SPNS2, PPL, LRRC8A, LRATD1, AQP5 
## Negative:  MORN2, C1orf194, SNTN, DNALI1, CAPS, CETN2, FAM81B, TMEM190, C20orf85, RSPH1 
##     PIFO, C9orf24, CFAP126, PPIL6, CLDN3, DPCD, TSPAN19, TUBA4B, TPPP3, C9orf116 
##     PFN2, IFT22, FAM183A, C5orf49, EFCAB1, SAA1, WDR38, ANKRD44-AS1, C9orf135, AKAP14 
## PC_ 5 
## Positive:  APOE, SLC16A10, AREG, TSC22D1, VEGFA, H2AC6, G0S2, SDC2, APOC1, CREM 
##     ATP13A3, CD9, GPR183, ZNF331, SDS, ELL2, SOD2, TMEM38B, RGS1, PMP22 
##     HLA-DQA1, CHMP1B, CD83, AVPI1, RAB7B, BTG1, SQSTM1, TREM2, IL1R1, CKS2 
## Negative:  ACTB, FCN1, C1orf162, LILRA5, MS4A6A, ACTG1, SSB, S100A8, CXCL10, S100A4 
##     S100A9, BLVRA, LYZ, NKG7, TYMP, CBR1, PLBD1, CMKLR1, APOBEC3A, PRF1 
##     CCL5, PLAAT4, TNFSF10, GZMA, CD8A, MPEG1, HSPB1, ASGR1, GZMB, CD38
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:38:31 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:38:31 Read 1702 rows and found 10 numeric columns
## 06:38:31 Using Annoy for neighbor search, n_neighbors = 30
## 06:38:31 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:38:32 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b11b818c2c
## 06:38:32 Searching Annoy index using 1 thread, search_k = 3000
## 06:38:32 Annoy recall = 100%
## 06:38:33 Commencing smooth kNN distance calibration using 1 thread
## 06:38:34 Initializing from normalized Laplacian + noise
## 06:38:34 Commencing optimization for 500 epochs, with 64604 positive edges
## 06:38:36 Optimization finished
saveRDS(pbmc, file = "severe149.rds")

a<-"severe_152_02.csv"
a1<-data.frame(fread(a),check.names=FALSE, row.names=1)
pbmc<- CreateSeuratObject(counts = a1, project = "s6", min.cells = 3, min.features = 200)
pbmc
## An object of class Seurat 
## 17352 features across 6960 samples within 1 assay 
## Active assay: RNA (17352 features, 0 variable features)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
## PC_ 1 
## Positive:  CST3, C15orf48, MAFB, TIMP1, GLUL, IFI30, BCL2A1, S100A9, CD163, SOD2 
##     PLAUR, EMP1, S100A8, LYZ, SERPINA1, FCGR2A, LGALS3, CCL7, PLEK, CTSZ 
##     HLA-DRA, IGSF6, CYP1B1, MARCKS, FCGR1A, PLIN2, MMP19, LAIR1, C5AR1, FNIP2 
## Negative:  IL32, CD27, CCL5, GZMA, CD3E, PTPRCAP, GZMB, LIMD2, CD3D, CD7 
##     GNLY, CD2, CD8A, PRF1, TRBC2, CD8B, NKG7, LDHB, HMGB1, SEPTIN1 
##     LINC01871, LCK, C12orf75, CLEC2D, EVL, TRAC, LIME1, CD3G, ETS1, GZMH 
## PC_ 2 
## Positive:  NKG7, CCL5, GZMA, GZMB, CD3E, CD7, CD3D, CD8A, PRF1, CD2 
##     CD8B, GNLY, LINC01871, PLAAT4, TRBC2, IL32, CST7, EVL, LCK, CD3G 
##     GZMH, LAG3, CLIC3, GZMK, CBLB, ARL4C, CD96, LBH, TRAC, LIMD2 
## Negative:  JCHAIN, MZB1, DERL3, HSP90B1, IGHG1, IGHG4, IGLC3, IGHV4-34, SEC11C, IGLV3-19 
##     POU2AF1, FKBP11, IGHA1, TXNDC5, MYDGF, CPNE5, PRDX4, CD79A, IGLL5, TNFRSF17 
##     LMAN1, SSR4, SDF2L1, HSPA5, ELL2, SDC1, TENT5C, CCR10, SYVN1, ITM2C 
## PC_ 3 
## Positive:  CD74, PPIB, HSP90B1, MANF, MYDGF, IGLC3, MZB1, IGHG4, HSPA5, PDIA4 
##     FKBP11, TMED9, JCHAIN, SEC11C, IGHG1, DERL3, S100A8, LMAN1, RPN2, POU2AF1 
##     IFI30, CCL7, ERLEC1, CD38, HSPD1, SRM, DERL1, BCL2A1, SMC4, CD163 
## Negative:  KRT19, KRT7, KLF5, C19orf33, KRT8, ELF3, SFN, TSPAN1, KRT17, CD24 
##     CAPS, RND3, GRAMD2B, ZMYND10, FAM183A, KRT18, CFAP77, SMIM22, RSPH4A, LRRIQ1 
##     CCDC190, SAXO2, CLDN3, AGR2, C9orf135, C9orf116, FXYD3, DNALI1, ADH7, C1orf194 
## PC_ 4 
## Positive:  SIT1, LIME1, IKZF3, MIAT, ARHGAP9, CPNE7, LTB, LINC01871, CD27, CST7 
##     PDE4D, SPOCK2, PARP8, LBH, APOBEC3G, PBXIP1, GSTM2, TIGIT, RGL4, PYHIN1 
##     CCND2, FKBP11, PIM2, MAGED1, E2F5, CXCR6, TC2N, SNAI3, SDC1, LINC00861 
## Negative:  TOP2A, CENPF, ASPM, KIFC1, CDC20, DLGAP5, UBE2C, MKI67, MXD3, TPX2 
##     CCNB1, GTSE1, CDCA8, HJURP, CCNA2, KIF23, PLK1, NUSAP1, CDKN3, NUF2 
##     BIRC5, SGO1, CDCA2, DEPDC1B, CENPE, H2AX, SGO2, CDK1, KIF2C, KIF22 
## PC_ 5 
## Positive:  SERPINB2, CCL7, CXCL3, S100A8, TMEM158, S100A12, CD93, MMP19, S100A9, CXCL2 
##     IL1B, DUSP6, CXCL8, PID1, CCL20, HPSE, EREG, ASPH, SPP1, CRADD 
##     ANPEP, THBD, FFAR2, CXCL5, CCL3L1, TCTEX1D1, RGCC, CXCL1, UBE2C, BCL2A1 
## Negative:  C1QA, C1QC, C1QB, HLA-DQA1, CD74, HLA-DQB1, HLA-DPB1, SDS, VMO1, SYNGR2 
##     CD86, LGMN, ACP5, HLA-DPA1, SLAMF7, AXL, GPR183, NCF1, NR1H3, APOE 
##     FPR3, HLA-DQA2, MS4A4A, HLA-DMB, TMEM176B, TREM2, TMEM176A, HLA-DRB5, CLEC10A, SPINT2
pbmc <- RunUMAP(pbmc, dims = 1:10)
## 06:39:03 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:39:03 Read 662 rows and found 10 numeric columns
## 06:39:03 Using Annoy for neighbor search, n_neighbors = 30
## 06:39:03 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:39:03 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b15b12321d
## 06:39:03 Searching Annoy index using 1 thread, search_k = 3000
## 06:39:03 Annoy recall = 100%
## 06:39:04 Commencing smooth kNN distance calibration using 1 thread
## 06:39:05 Initializing from normalized Laplacian + noise
## 06:39:05 Commencing optimization for 500 epochs, with 23720 positive edges
## 06:39:06 Optimization finished
saveRDS(pbmc, file = "severe152.rds")
#input readRDS
hc51<-readRDS(file="hc51.rds")
hc52<-readRDS(file="hc52.rds")
hc100<-readRDS(file="hc100.rds")
mild141<-readRDS(file="mild141.rds")
mild142<-readRDS(file="mild142.rds")
mild144<-readRDS(file="mild144.rds")
severe143<-readRDS(file="severe143.rds")
severe145<-readRDS(file="severe145.rds")
severe146<-readRDS(file="severe146.rds")
severe148<-readRDS(file="severe148.rds")
severe149<-readRDS(file="severe149.rds")
severe152<-readRDS(file="severe152.rds")
#setup tech and celltype
hc51<-RenameCells(hc51,add.cell.id="hc51",for.merge=T)
hc51@meta.data$tech<-"healthy_control"
hc51@meta.data$celltype<-"healthy_control_51"

hc52<-RenameCells(hc52,add.cell.id="hc52",for.merge=T)
hc52@meta.data$tech<-"healthy_control"
hc52@meta.data$celltype<-"healthy_control_52"

hc100<-RenameCells(hc100,add.cell.id="hc100",for.merge=T)
hc100@meta.data$tech<-"healthy_control"
hc100@meta.data$celltype<-"healthy_control_100"

mild141<-RenameCells(mild141,add.cell.id="mild141",for.merge=T)
mild141@meta.data$tech<-"mild"
mild141@meta.data$celltype<-"mild_141"

mild142<-RenameCells(mild142,add.cell.id="mild142",for.merge=T)
mild142@meta.data$tech<-"mild"
mild142@meta.data$celltype<-"mild_142"

mild144<-RenameCells(mild144,add.cell.id="mild144",for.merge=T)
mild144@meta.data$tech<-"mild"
mild144@meta.data$celltype<-"mild_144"

severe143<-RenameCells(severe143,add.cell.id="severe143",for.merge=T)
severe143@meta.data$tech<-"severe"
severe143@meta.data$celltype<-"severe_143"

severe145<-RenameCells(severe145,add.cell.id="severe145",for.merge=T)
severe145@meta.data$tech<-"severe"
severe145@meta.data$celltype<-"severe_145"

severe146<-RenameCells(severe146,add.cell.id="severe146",for.merge=T)
severe146@meta.data$tech<-"severe"
severe146@meta.data$celltype<-"severe_146"

severe148<-RenameCells(severe148,add.cell.id="severe148",for.merge=T)
severe148@meta.data$tech<-"severe"
severe148@meta.data$celltype<-"severe_148"

severe149<-RenameCells(severe149,add.cell.id="severe149",for.merge=T)
severe149@meta.data$tech<-"severe"
severe149@meta.data$celltype<-"severe_149"

severe152<-RenameCells(severe152,add.cell.id="severe152",for.merge=T)
severe152@meta.data$tech<-"severe"
severe152@meta.data$celltype<-"severe_152" 
#merge
h1_2<-merge(hc51,hc52)
h123<-merge(h1_2,hc100)
m1_2<-merge(mild141,mild142)
m123<-merge(m1_2,mild144)
s1_2<-merge(severe143,severe145)
s123<-merge(s1_2,severe146)
s4_5<-merge(severe148,severe149)
s456<-merge(s4_5,severe152)
s123456<-merge(s123,s456)
hm<-merge(h123,m123)
hms<-merge(hm,s123456)
saveRDS(hms, file="hms_before_integrate.rds")
#before integrate
hms[["percent.mt"]] <- PercentageFeatureSet(hms, pattern = "^Mt-")
VlnPlot(hms, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,pt.size = 0)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.

pancreas <- NormalizeData(object = hms, normalization.method = "LogNormalize", scale.factor = 1e4)
pancreas <- FindVariableFeatures(pancreas, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
pancreas <- ScaleData(pancreas, verbose = FALSE)
pancreas <- RunPCA(pancreas, npcs = 30, verbose = FALSE)
pancreas <- RunUMAP(pancreas, reduction = "pca", dims = 1:30)
## 06:43:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 06:43:32 Read 52158 rows and found 30 numeric columns
## 06:43:32 Using Annoy for neighbor search, n_neighbors = 30
## 06:43:32 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 06:43:40 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b15d5d3735
## 06:43:40 Searching Annoy index using 1 thread, search_k = 3000
## 06:43:57 Annoy recall = 100%
## 06:43:57 Commencing smooth kNN distance calibration using 1 thread
## 06:43:59 Initializing from normalized Laplacian + noise
## 06:44:02 Commencing optimization for 200 epochs, with 2357338 positive edges
## 06:44:22 Optimization finished
p1 <- DimPlot(pancreas, reduction = "umap", group.by = "tech")
p2 <- DimPlot(pancreas, reduction = "umap", group.by = "celltype", label = TRUE, repel = TRUE) + 
  NoLegend()
plot_grid(p1,p2)

#integrate
pancreas.list <- SplitObject(pancreas, split.by = "celltype")
for (i in 1: length(pancreas.list)) {
  pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE)
  pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]], selection.method = "vst", nfeatures = 2000, 
                                             verbose = FALSE)
}
reference.list <- pancreas.list[c("healthy_control_51","healthy_control_52","healthy_control_100","mild_141",
                                  "mild_142","mild_144", "severe_143", "severe_145", "severe_146", "severe_148",
                                  "severe_149", "severe_152")]
pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 22355 anchors
## Filtering anchors
##  Retained 4674 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1464 anchors
## Filtering anchors
##  Retained 1171 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1406 anchors
## Filtering anchors
##  Retained 1078 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5743 anchors
## Filtering anchors
##  Retained 1976 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6214 anchors
## Filtering anchors
##  Retained 1132 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1330 anchors
## Filtering anchors
##  Retained 844 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6653 anchors
## Filtering anchors
##  Retained 2100 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 7667 anchors
## Filtering anchors
##  Retained 1464 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1403 anchors
## Filtering anchors
##  Retained 843 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 4174 anchors
## Filtering anchors
##  Retained 3295 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1749 anchors
## Filtering anchors
##  Retained 1412 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1683 anchors
## Filtering anchors
##  Retained 1159 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1089 anchors
## Filtering anchors
##  Retained 925 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1436 anchors
## Filtering anchors
##  Retained 1396 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1518 anchors
## Filtering anchors
##  Retained 1478 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 26470 anchors
## Filtering anchors
##  Retained 1322 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 25133 anchors
## Filtering anchors
##  Retained 699 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1499 anchors
## Filtering anchors
##  Retained 329 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5535 anchors
## Filtering anchors
##  Retained 1391 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6575 anchors
## Filtering anchors
##  Retained 1665 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1655 anchors
## Filtering anchors
##  Retained 496 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 29135 anchors
## Filtering anchors
##  Retained 1596 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 28477 anchors
## Filtering anchors
##  Retained 1149 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1488 anchors
## Filtering anchors
##  Retained 343 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5554 anchors
## Filtering anchors
##  Retained 1312 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6527 anchors
## Filtering anchors
##  Retained 1667 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1634 anchors
## Filtering anchors
##  Retained 520 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 31795 anchors
## Filtering anchors
##  Retained 7563 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 9221 anchors
## Filtering anchors
##  Retained 1203 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 9137 anchors
## Filtering anchors
##  Retained 1052 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1381 anchors
## Filtering anchors
##  Retained 526 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 4569 anchors
## Filtering anchors
##  Retained 1404 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 5203 anchors
## Filtering anchors
##  Retained 1457 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1551 anchors
## Filtering anchors
##  Retained 700 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 9057 anchors
## Filtering anchors
##  Retained 4508 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 9376 anchors
## Filtering anchors
##  Retained 2695 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 3196 anchors
## Filtering anchors
##  Retained 1671 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 3087 anchors
## Filtering anchors
##  Retained 1340 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1367 anchors
## Filtering anchors
##  Retained 889 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2410 anchors
## Filtering anchors
##  Retained 1752 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2549 anchors
## Filtering anchors
##  Retained 1798 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1316 anchors
## Filtering anchors
##  Retained 953 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 3314 anchors
## Filtering anchors
##  Retained 2834 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 3282 anchors
## Filtering anchors
##  Retained 2242 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2697 anchors
## Filtering anchors
##  Retained 2337 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6494 anchors
## Filtering anchors
##  Retained 2055 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 7023 anchors
## Filtering anchors
##  Retained 1591 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1477 anchors
## Filtering anchors
##  Retained 802 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 3698 anchors
## Filtering anchors
##  Retained 2245 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 4414 anchors
## Filtering anchors
##  Retained 2686 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1610 anchors
## Filtering anchors
##  Retained 949 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6726 anchors
## Filtering anchors
##  Retained 4706 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 6381 anchors
## Filtering anchors
##  Retained 3811 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 4553 anchors
## Filtering anchors
##  Retained 2699 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2760 anchors
## Filtering anchors
##  Retained 1993 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2794 anchors
## Filtering anchors
##  Retained 1741 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2930 anchors
## Filtering anchors
##  Retained 1378 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1358 anchors
## Filtering anchors
##  Retained 1001 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2380 anchors
## Filtering anchors
##  Retained 1729 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2597 anchors
## Filtering anchors
##  Retained 1922 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1509 anchors
## Filtering anchors
##  Retained 1105 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2759 anchors
## Filtering anchors
##  Retained 2216 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2754 anchors
## Filtering anchors
##  Retained 2088 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2527 anchors
## Filtering anchors
##  Retained 1782 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 1999 anchors
## Filtering anchors
##  Retained 1555 anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 2664 anchors
## Filtering anchors
##  Retained 2153 anchors
pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30)
## Merging dataset 10 into 7
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 6 into 5
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 3 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 12 into 11
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 4 into 5 6
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 11 12 into 7 10
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 9 into 7 10 11 12
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 5 6 4 into 1 3
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 2 into 1 3 5 6 4
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 8 into 7 10 11 12 9
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 3 5 6 4 2 into 7 10 11 12 9 8
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
DefaultAssay(pancreas.integrated) <- "integrated"
pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE)
pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE)
pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = 1:30)
## 07:55:08 UMAP embedding parameters a = 0.9922 b = 1.112
## 07:55:08 Read 52158 rows and found 30 numeric columns
## 07:55:08 Using Annoy for neighbor search, n_neighbors = 30
## 07:55:09 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 07:57:03 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b14c673012
## 07:57:03 Searching Annoy index using 1 thread, search_k = 3000
## 07:57:22 Annoy recall = 100%
## 07:57:22 Commencing smooth kNN distance calibration using 1 thread
## 07:57:24 Initializing from normalized Laplacian + noise
## 07:57:38 Commencing optimization for 200 epochs, with 2462718 positive edges
## 07:57:59 Optimization finished
p1 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "tech")
p2 <- DimPlot(pancreas.integrated, reduction = "umap", group.by = "celltype")
plot_grid(p1,p2)

saveRDS(pancreas.integrated, file = "hms_individual_integrated_OK.rds")


#draw the plot group.by celltype
hms_individual_integrated<-readRDS(file="hms_individual_integrated_OK.rds")
p1 <- DimPlot(hms_individual_integrated, reduction = "umap", group.by = "celltype")
p1

#find how many 15cluster
ElbowPlot(hms_individual_integrated)

hms_neighbor<- FindNeighbors(hms_individual_integrated, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
hms_cluster <- FindClusters( hms_neighbor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 52158
## Number of edges: 1604563
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8838
## Number of communities: 15
## Elapsed time: 18 seconds
head(Idents(hms_cluster), 5)
## hc51_AAACCTGAGACACTAA-1 hc51_AAACCTGAGGAGTACC-1 hc51_AAACCTGAGGATATAC-1 
##                       1                       9                       1 
## hc51_AAACCTGAGGTCATCT-1 hc51_AAACCTGCACGGATAG-1 
##                       1                       3 
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
hms_cluster<- RunUMAP(hms_cluster, dims = 1:10)
## 08:00:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 08:00:27 Read 52158 rows and found 10 numeric columns
## 08:00:27 Using Annoy for neighbor search, n_neighbors = 30
## 08:00:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 08:00:34 Writing NN index file to temp file /tmp/Rtmp07H6lD/file38b1c654706
## 08:00:34 Searching Annoy index using 1 thread, search_k = 3000
## 08:00:52 Annoy recall = 100%
## 08:00:53 Commencing smooth kNN distance calibration using 1 thread
## 08:00:55 Initializing from normalized Laplacian + noise
## 08:00:57 Commencing optimization for 200 epochs, with 2146824 positive edges
## 08:01:17 Optimization finished
DimPlot(hms_cluster, reduction = "umap")

saveRDS(hms_cluster, file = "hms_cluster_test.rds")
#name each cluster id
new.cluster.ids <- c("Macrophage", "Macrophage", "Macrophage", "Macrophage", "Neutrophils", "Macrophage", "Naive_CD4_T", "NK", "Neutrophils", "Dendritic", "T", "T","Basal","Plasma","T") 
names(new.cluster.ids) <- levels(hms_cluster)
hms_cluster_id<- RenameIdents(hms_cluster, new.cluster.ids)
DimPlot(hms_cluster_id, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

saveRDS(hms_cluster_id, file = "hms_cluster_id_test.rds")

#hms_cluster_id<-readRDS(file="hms_cluster_id_test.rds")
Macrophage<-subset(hms_cluster_id, idents=c('Macrophage'))
DimPlot(Macrophage, reduction = "umap")

saveRDS(Macrophage, file="Macrophage.rds")
Neutrophils<-subset(hms_cluster_id, idents=c('Neutrophils'))
DimPlot(Neutrophils, reduction = "umap")

saveRDS(Neutrophils, file="Neutrophils.rds")
Naive_CD4_T<-subset(hms_cluster_id, idents=c('Naive_CD4_T'))
DimPlot(Naive_CD4_T, reduction = "umap")

saveRDS(Naive_CD4_T, file="Naive_CD4_T.rds")
NK<-subset(hms_cluster_id, idents=c('NK'))
DimPlot(NK, reduction = "umap")

saveRDS(NK, file="NK.rds")
Dendritic<-subset(hms_cluster_id, idents=c('Dendritic'))
DimPlot(Dendritic, reduction = "umap")

saveRDS(Dendritic, file="Dendritic.rds")
Basal<-subset(hms_cluster_id, idents=c('Basal'))
DimPlot(Basal, reduction = "umap")

saveRDS(Basal, file="Basal.rds")
T<-subset(hms_cluster_id, idents=c('T'))
DimPlot(T, reduction = "umap")

saveRDS(T, file="T.rds")
Plasma<-subset(hms_cluster_id, idents=c('Plasma'))
DimPlot(Plasma, reduction = "umap")

saveRDS(Plasma, file="Plasma.rds")

#input each cluster
Basal<-readRDS("Basal.rds")
Dendritic<-readRDS("Dendritic.rds")
Macrophage<-readRDS("Macrophage.rds")
Naive_CD4_T<-readRDS("Naive_CD4_T.rds")
Neutrophils<-readRDS("Neutrophils.rds")
NK<-readRDS("NK.rds")
Plasma<-readRDS("Plasma.rds")
T<-readRDS("T.rds")

DimPlot(Basal, reduction = "umap", split.by = "tech")

DimPlot(Dendritic, reduction = "umap", split.by = "tech")

DimPlot(Macrophage, reduction = "umap", split.by = "tech")

DimPlot(Naive_CD4_T, reduction = "umap", split.by = "tech")

DimPlot(Neutrophils, reduction = "umap", split.by = "tech")

DimPlot(NK, reduction = "umap", split.by = "tech")

DimPlot(Plasma, reduction = "umap", split.by = "tech")

DimPlot(T, reduction = "umap", split.by = "tech")

markers.to.plot <- c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","ARRDC3","EREG","ARSE","MSP2K6","DHCR7","UCP2","SLC25A10","VIL1","MCM5","DHCR24","SLC9A3R1","PFN1","TPPP3","DEGS2","RAB26")
DoHeatmap(subset(Basal,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Basal, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Dendritic,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Dendritic, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Macrophage,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Macrophage, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Naive_CD4_T,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Naive_CD4_T, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Neutrophils,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Neutrophils, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(NK,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(NK, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(Plasma,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(Plasma, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

DoHeatmap(subset(T,downsample=50000), features = markers.to.plot, size = 5, group.by="tech")
## Warning in DoHeatmap(subset(T, downsample = 50000), features =
## markers.to.plot, : The following features were omitted as they were not found
## in the scale.data slot for the integrated assay: RAB26, DEGS2, PFN1, SLC9A3R1,
## MCM5, VIL1, SLC25A10, DHCR7, MSP2K6, ARSE, ARRDC3

RidgePlot(Basal, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"),cols = c("green3","cornflowerblue","orangered"), group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00325
## Picking joint bandwidth of 0.0474
## Picking joint bandwidth of 0.117
## Picking joint bandwidth of 0.0865
## Picking joint bandwidth of 0.184
## Picking joint bandwidth of 0.17
## Picking joint bandwidth of 0.00184
## Picking joint bandwidth of 0.0186
## Picking joint bandwidth of 0.0215
## Picking joint bandwidth of 0.268
## Picking joint bandwidth of 0.0309
## Picking joint bandwidth of 0.153
## Picking joint bandwidth of 0.0523
## Picking joint bandwidth of 0.216

RidgePlot(Dendritic, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"),cols = c("green3","cornflowerblue","orangered"), group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00878
## Picking joint bandwidth of 0.0276
## Picking joint bandwidth of 0.0504
## Picking joint bandwidth of 0.0327
## Picking joint bandwidth of 0.0744
## Picking joint bandwidth of 0.185
## Picking joint bandwidth of 0.00438
## Picking joint bandwidth of 0.0285
## Picking joint bandwidth of 0.0113
## Picking joint bandwidth of 0.22
## Picking joint bandwidth of 0.0226
## Picking joint bandwidth of 0.139
## Picking joint bandwidth of 0.00564
## Picking joint bandwidth of 0.0065

RidgePlot(Macrophage, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.0032
## Picking joint bandwidth of 0.0547
## Picking joint bandwidth of 0.0595
## Picking joint bandwidth of 0.0232
## Picking joint bandwidth of 0.0672
## Picking joint bandwidth of 0.0975
## Picking joint bandwidth of 0.0025
## Picking joint bandwidth of 0.0244
## Picking joint bandwidth of 0.00684
## Picking joint bandwidth of 0.118
## Picking joint bandwidth of 0.027
## Picking joint bandwidth of 0.0945
## Picking joint bandwidth of 0.00494
## Picking joint bandwidth of 0.00208

RidgePlot(Naive_CD4_T, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.000519
## Picking joint bandwidth of 0.007
## Picking joint bandwidth of 0.137
## Picking joint bandwidth of 0.0131
## Picking joint bandwidth of 0.105
## Picking joint bandwidth of 0.151
## Picking joint bandwidth of 0.00046
## Picking joint bandwidth of 0.00276
## Picking joint bandwidth of 0.0178
## Picking joint bandwidth of 0.156
## Picking joint bandwidth of 0.003
## Picking joint bandwidth of 0.094
## Picking joint bandwidth of 0.00795
## Picking joint bandwidth of 0.00122

RidgePlot(Neutrophils, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00741
## Picking joint bandwidth of 0.125
## Picking joint bandwidth of 0.174
## Picking joint bandwidth of 0.099
## Picking joint bandwidth of 0.208
## Picking joint bandwidth of 0.296
## Picking joint bandwidth of 0.00974
## Picking joint bandwidth of 0.145
## Picking joint bandwidth of 0.0618
## Picking joint bandwidth of 0.241
## Picking joint bandwidth of 0.0378
## Picking joint bandwidth of 0.0676
## Picking joint bandwidth of 0.00527
## Picking joint bandwidth of 0.00183

RidgePlot(NK, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.000989
## Picking joint bandwidth of 0.00466
## Picking joint bandwidth of 0.127
## Picking joint bandwidth of 0.0269
## Picking joint bandwidth of 0.0874
## Picking joint bandwidth of 0.123
## Picking joint bandwidth of 0.00018
## Picking joint bandwidth of 0.00249
## Picking joint bandwidth of 0.00739
## Picking joint bandwidth of 0.144
## Picking joint bandwidth of 0.00219
## Picking joint bandwidth of 0.115
## Picking joint bandwidth of 0.0115
## Picking joint bandwidth of 0.00101

RidgePlot(Plasma, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00359
## Picking joint bandwidth of 0.016
## Picking joint bandwidth of 0.0848
## Picking joint bandwidth of 0.0223
## Picking joint bandwidth of 0.124
## Picking joint bandwidth of 0.171
## Picking joint bandwidth of 0.00139
## Picking joint bandwidth of 0.00849
## Picking joint bandwidth of 0.00974
## Picking joint bandwidth of 0.217
## Picking joint bandwidth of 0.00637
## Picking joint bandwidth of 0.162
## Picking joint bandwidth of 0.019
## Picking joint bandwidth of 0.0038

RidgePlot(T, features = c("IL1A", "CXCL2","TNFAIP3","MAFF","PPP1R15A","NFKBIA","PTX3","CXCL3","CCL20","IFIT2","EREG","UCP2","DHCR24","TPPP3"), cols = c("green3","cornflowerblue","orangered"),group.by="tech", ncol = 3) + theme(axis.title.y = element_blank())
## Picking joint bandwidth of 0.00406
## Picking joint bandwidth of 0.0337
## Picking joint bandwidth of 0.115
## Picking joint bandwidth of 0.036
## Picking joint bandwidth of 0.127
## Picking joint bandwidth of 0.159
## Picking joint bandwidth of 0.00745
## Picking joint bandwidth of 0.0229
## Picking joint bandwidth of 0.0128
## Picking joint bandwidth of 0.196
## Picking joint bandwidth of 0.0213
## Picking joint bandwidth of 0.139
## Picking joint bandwidth of 0.0111
## Picking joint bandwidth of 0.00282